Esempio n. 1
0
    def build_decoder_cell(self):
        encoder_inputs_length = self.encoder_inputs_length
        if self.beam_search:
            print("use beamsearch decoding..")
            self.encoder_outputs = tile_batch(self.encoder_outputs,
                                              multiplier=self.beam_size)
            self.encoder_state = nest.map_structure(
                lambda s: tile_batch(s, self.beam_size), self.encoder_state)
            encoder_inputs_length = tile_batch(encoder_inputs_length,
                                               multiplier=self.beam_size)

        # 定义要使用的attention机制。
        attention_mechanism = BahdanauAttention(
            num_units=self.rnn_size,
            memory=self.encoder_outputs,
            memory_sequence_length=encoder_inputs_length)

        # 定义decoder阶段要使用的RNNCell,然后为其封装attention wrapper
        decoder_cell = self.create_rnn_cell()
        decoder_cell = AttentionWrapper(
            cell=decoder_cell,
            attention_mechanism=attention_mechanism,
            attention_layer_size=self.rnn_size,
            name='Attention_Wrapper')

        batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size

        decoder_initial_state = decoder_cell.zero_state(
            batch_size=batch_size,
            dtype=tf.float32).clone(cell_state=self.encoder_state)

        return decoder_cell, decoder_initial_state
 def _build_infer(self, config):
     # infer_decoder/beam_search
     # skip for flat_baseline
     if config.use_att:  # with att
         tiled_inputs = tile_batch(self.xx_context,
                                   multiplier=config.beam_width)
         tiled_sequence_length = tile_batch(self.x_seq_length,
                                            multiplier=config.beam_width)
         tiled_first_attention = tile_batch(self.first_attention,
                                            multiplier=config.beam_width)
         attention_mechanism = BahdanauAttention(
             config.decode_size,
             memory=tiled_inputs,
             memory_sequence_length=tiled_sequence_length)
         tiled_xx_final = tile_batch(self.xx_final, config.beam_width)
         encoder_state2 = rnn.LSTMStateTuple(tiled_xx_final, tiled_xx_final)
         cell = AttentionWrapper(self.lstm,
                                 attention_mechanism,
                                 output_attention=False)
         cell_state = cell.zero_state(dtype=tf.float32,
                                      batch_size=config.test_batch_size *
                                      config.beam_width)
         cell_state = cell_state.clone(cell_state=encoder_state2,
                                       attention=tiled_first_attention)
         infer_decoder = BeamSearchDecoder(cell,
                                           embedding=self.label_embeddings,
                                           start_tokens=[config.GO] *
                                           config.test_batch_size,
                                           end_token=config.EOS,
                                           initial_state=cell_state,
                                           beam_width=config.beam_width,
                                           output_layer=self.output_l)
         decoder_outputs_infer, decoder_state_infer, decoder_seq_infer = dynamic_decode(
             infer_decoder, maximum_iterations=config.max_seq_length)
         self.preds = decoder_outputs_infer.predicted_ids
         self.scores = decoder_state_infer.log_probs
     else:  # without att
         tiled_inputs = tile_batch(self.xx_context,
                                   multiplier=config.beam_width)
         tiled_sequence_length = tile_batch(self.x_seq_length,
                                            multiplier=config.beam_width)
         tiled_xx_final = tile_batch(self.xx_final, config.beam_width)
         encoder_state = rnn.LSTMStateTuple(tiled_xx_final, tiled_xx_final)
         #tiled_encoder_state = tile_batch(encoder_state, config.beam_width)
         cell = self.lstm
         infer_decoder = BeamSearchDecoder(cell,
                                           embedding=self.label_embeddings,
                                           start_tokens=[config.GO] *
                                           config.test_batch_size,
                                           end_token=config.EOS,
                                           initial_state=encoder_state,
                                           beam_width=config.beam_width,
                                           output_layer=self.output_l)
         decoder_outputs_infer, decoder_state_infer, decoder_seq_infer = dynamic_decode(
             infer_decoder, maximum_iterations=config.max_seq_length)
         self.preds = decoder_outputs_infer.predicted_ids
         self.scores = decoder_state_infer.log_probs
Esempio n. 3
0
    def setup_decoder_cell(self, config, keep_prob, use_beam_search,
                           init_state, attention_states, attention_lengths):
        batch_size = get_state_shape(init_state)[0]
        if use_beam_search:
            attention_states = tile_batch(attention_states,
                                          multiplier=self.beam_width)
            init_state = nest.map_structure(
                lambda s: tile_batch(s, self.beam_width), init_state)
            attention_lengths = tile_batch(attention_lengths,
                                           multiplier=self.beam_width)
            batch_size = batch_size * self.beam_width

        attention_size = shape(attention_states, -1)
        attention = getattr(tf.contrib.seq2seq, config.attention_type)(
            attention_size,
            attention_states,
            memory_sequence_length=attention_lengths)

        def cell_input_fn(inputs, attention):
            # define cell input function to keep input/output dimension same
            if not config.use_attention_input_feeding:
                return inputs
            attn_project = tf.layers.Dense(config.hidden_size,
                                           dtype=tf.float32,
                                           name='attn_input_feeding',
                                           activation=self.activation)
            return attn_project(tf.concat([inputs, attention], axis=-1))

        cells = _setup_decoder_cell(config, keep_prob)
        if config.top_attention:  # apply attention mechanism only on the top decoder layer
            cells[-1] = AttentionWrapper(
                cells[-1],
                attention_mechanism=attention,
                name="AttentionWrapper",
                attention_layer_size=config.hidden_size,
                alignment_history=use_beam_search,
                initial_cell_state=init_state[-1],
                cell_input_fn=cell_input_fn)
            init_state = [state for state in init_state]
            init_state[-1] = cells[-1].zero_state(batch_size=batch_size,
                                                  dtype=tf.float32)
            init_state = tuple(init_state)
            cells = MultiRNNCell(cells)
        else:
            cells = MultiRNNCell(cells)
            cells = AttentionWrapper(cells,
                                     attention_mechanism=attention,
                                     name="AttentionWrapper",
                                     attention_layer_size=config.hidden_size,
                                     alignment_history=use_beam_search,
                                     initial_cell_state=init_state,
                                     cell_input_fn=cell_input_fn)
            init_state = cells.zero_state(batch_size=batch_size, dtype=tf.float32) \
                              .clone(cell_state=init_state)
        return cells, init_state
    def _create_decoder_cell(self):
        enc_outputs, enc_states, enc_seq_len = self.enc_outputs, self.enc_states, self.enc_seq_len
        batch_size = self.batch_size * self.cfg.beam_size if self.use_beam_search else self.batch_size
        with tf.variable_scope("attention"):
            if self.cfg.attention == "luong":  # Luong attention mechanism
                attention_mechanism = LuongAttention(
                    num_units=self.cfg.num_units,
                    memory=enc_outputs,
                    memory_sequence_length=enc_seq_len)
            else:  # default using Bahdanau attention mechanism
                attention_mechanism = BahdanauAttention(
                    num_units=self.cfg.num_units,
                    memory=enc_outputs,
                    memory_sequence_length=enc_seq_len)

        def cell_input_fn(
            inputs, attention
        ):  # define cell input function to keep input/output dimension same
            # reference: https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/AttentionWrapper
            if not self.cfg.use_attention_input_feeding:
                return inputs
            input_project = tf.layers.Dense(self.cfg.num_units,
                                            dtype=tf.float32,
                                            name='attn_input_feeding')
            return input_project(tf.concat([inputs, attention], axis=-1))

        if self.cfg.top_attention:  # apply attention mechanism only on the top decoder layer
            cells = [
                self._create_rnn_cell() for _ in range(self.cfg.num_layers)
            ]
            cells[-1] = AttentionWrapper(
                cells[-1],
                attention_mechanism=attention_mechanism,
                name="Attention_Wrapper",
                attention_layer_size=self.cfg.num_units,
                initial_cell_state=enc_states[-1],
                cell_input_fn=cell_input_fn)
            initial_state = [state for state in enc_states]
            initial_state[-1] = cells[-1].zero_state(batch_size=batch_size,
                                                     dtype=tf.float32)
            dec_init_states = tuple(initial_state)
            cells = MultiRNNCell(cells)
        else:
            cells = MultiRNNCell(
                [self._create_rnn_cell() for _ in range(self.cfg.num_layers)])
            cells = AttentionWrapper(cells,
                                     attention_mechanism=attention_mechanism,
                                     name="Attention_Wrapper",
                                     attention_layer_size=self.cfg.num_units,
                                     initial_cell_state=enc_states,
                                     cell_input_fn=cell_input_fn)
            dec_init_states = cells.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=enc_states)
        return cells, dec_init_states
Esempio n. 5
0
    def build_decoder_cell(self):
        encoder_inputs_length = self.encoder_inputs_length  # 编码器输入长度
        if self.beam_search:  # 是否使用beam search
            print("use beamsearch decoding..")
            # 如果使用beam_search,则需要将encoder的输出进行tile_batch
            # tile_batch的功能是将第一个参数的数据复制multiplier份,在此例中是beam_size份
            self.encoder_outputs = tile_batch(self.encoder_outputs,
                                              multiplier=self.beam_size)
            # lambda是一个表达式,在此处相当于是一个关于s的函数
            # nest.map_structure(func,structure)将func应用于每一个structure并返回值
            # 因为LSTM中有c和h两个structure,所以需要使用nest.map_structrue()
            self.encoder_state = nest.map_structure(
                lambda s: tile_batch(s, self.beam_size), self.encoder_state)
            encoder_inputs_length = tile_batch(encoder_inputs_length,
                                               multiplier=self.beam_size)

        # 定义要使用的attention机制。
        # 使用的attention机制是Bahdanau Attention,关于这种attention机制的细节,可以查看论文
        # Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio.
        # "Neural Machine Translation by Jointly Learning to Align and Translate."
        # ICLR 2015. https://arxiv.org/abs/1409.0473
        # 这种attention机制还有一种正则化的版本,如果需要在tensorflow中使用,加上参数normalize=True即可
        # 关于正则化的细节,可以查看论文
        # Tim Salimans, Diederik P. Kingma.
        # "Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks."
        # https://arxiv.org/abs/1602.07868
        attention_mechanism = BahdanauAttention(
            num_units=self.rnn_size,  # 隐层的维度
            memory=self.encoder_outputs,  # 通常情况下就是encoder的输出
            # memory的mask,超过长度数据不计入attention
            memory_sequence_length=encoder_inputs_length)

        # 定义decoder阶段要使用的RNNCell,然后为其封装attention wrapper
        decoder_cell = self.create_rnn_cell()  # 定义decoder阶段要使用的RNNCell
        decoder_cell = AttentionWrapper(  # AttentionWrapper()用于封装带attention机制的RNN网络
            cell=decoder_cell,  # cell参数指明了需要封装的RNN网络
            attention_mechanism=
            attention_mechanism,  # attention_mechanism指明了AttentionMechanism的实例
            attention_layer_size=self.
            rnn_size,  # attention_layer_size TODO:是attention封装后的RNN状态维度?
            name='Attention_Wrapper'  # name指明了AttentionWrapper的名字
        )

        # 如果使用beam_seach则batch_size = self.batch_size * self.beam_size
        batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size

        # AttentionWrapper.zero_state()的功能是将AttentionWrapper对象0初始化
        # AttentionWrapper对象0初始化后可以使用.clone()方法将参数中的状态赋值给AttentionWrapper对象
        # 本例中使用encoder阶段的最后一个隐层状态来赋值定义decoder阶段的初始化状态
        decoder_initial_state = decoder_cell.zero_state(
            batch_size=batch_size,
            dtype=tf.float32).clone(cell_state=self.encoder_state)

        return decoder_cell, decoder_initial_state
def create_attention(decoding_cell, encoding_op, encoding_st, fr_len):

    if (args.attention_option is "Luong"):
        print("Attention is all I need.")
        attention_mechanism = tf.contrib.seq2seq.LuongAttention(
            hidden_size, encoding_op, fr_len)
        decoding_cell = AttentionWrapper(decoding_cell, attention_mechanism,
                                         hidden_size)
        attention_zero_state = decoding_cell.zero_state(batch_size, tf.float32)
        attention_zero_state = attention_zero_state.clone(
            cell_state=encoding_st)
        return decoding_cell, attention_zero_state
Esempio n. 7
0
    def __graph__(self):

        # encoder
        encoder_outputs, encoder_state = self.encoder()

        # decoder
        with tf.variable_scope('decoder'):
            encoder_inputs_length = self.encoder_inputs_length
            if self.beam_search:
                # 如果使用beam_search,则需要将encoder的输出进行tile_batch,其实就是复制beam_size份。
                print("use beamsearch decoding..")
                encoder_outputs = tile_batch(encoder_outputs, multiplier=self.beam_size)
                encoder_state = nest.map_structure(lambda s: tf.contrib.seq2seq.tile_batch(s, self.beam_size), encoder_state)
                encoder_inputs_length = tile_batch(encoder_inputs_length, multiplier=self.beam_size)

            # 定义要使用的attention机制。
            attention_mechanism = BahdanauAttention(num_units=self.rnn_size,
                                                    memory=encoder_outputs,
                                                    memory_sequence_length=encoder_inputs_length)
            # 定义decoder阶段要是用的RNNCell,然后为其封装attention wrapper
            decoder_cell = self.create_rnn_cell()
            decoder_cell = AttentionWrapper(cell=decoder_cell,
                                            attention_mechanism=attention_mechanism,
                                            attention_layer_size=self.rnn_size,
                                            name='Attention_Wrapper')
            # 如果使用beam_seach则batch_size = self.batch_size * self.beam_size
            batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size

            # 定义decoder阶段的初始化状态,直接使用encoder阶段的最后一个隐层状态进行赋值
            decoder_initial_state = decoder_cell.zero_state(batch_size=batch_size,
                                                            dtype=tf.float32).clone(cell_state=encoder_state)

            output_layer = tf.layers.Dense(self.vocab_size, kernel_initializer=tf.truncated_normal_initializer(
                                                            mean=0.0,
                                                            stddev=0.1))

            if self.mode == 'train':
                self.decoder_outputs = self.decoder_train(decoder_cell, decoder_initial_state, output_layer)
                # loss
                self.loss = sequence_loss(logits=self.decoder_outputs, targets=self.decoder_targets, weights=self.mask)

                # summary
                tf.summary.scalar('loss', self.loss)
                self.summary_op = tf.summary.merge_all()

                # optimizer
                optimizer = tf.train.AdamOptimizer(self.learing_rate)
                trainable_params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, trainable_params)
                clip_gradients, _ = tf.clip_by_global_norm(gradients, self.max_gradient_norm)
                self.train_op = optimizer.apply_gradients(zip(clip_gradients, trainable_params))
            elif self.mode == 'decode':
                self.decoder_predict_decode = self.decoder_decode(decoder_cell, decoder_initial_state, output_layer)
Esempio n. 8
0
def apply_attention(cell_dec,
                    enc_hidden_states,
                    enc_final_state,
                    input_length,
                    batch_size,
                    attention_probability_fn,
                    dropout,
                    alignment_history=True):

    if attention_probability_fn == 'softmax':
        probability_fn = tf.nn.softmax
        score_mask_value = float('-inf')
    elif attention_probability_fn == 'hardmax':
        probability_fn = tf.contrib.seq2seq.hardmax
        score_mask_value = float('-inf')
    elif attention_probability_fn == 'sparsemax':

        def sparsemax(attentionscores):
            attentionscores = tf.contrib.sparsemax.sparsemax(attentionscores)
            with tf.control_dependencies([
                    tf.assert_non_negative(attentionscores),
                    tf.assert_less_equal(attentionscores, 1., summarize=60)
            ]):
                return tf.identity(attentionscores)

        probability_fn = sparsemax
        # sparsemax does not deal with -inf properly, and has significant numerical stability issues
        # with large numbers (positive or negative)
        score_mask_value = -1e+5
    else:
        raise ValueError("Invalid attention_probability_fn " +
                         str(attention_probability_fn))

    with tf.variable_scope(
            'attention',
            initializer=tf.initializers.identity(dtype=tf.float32)):
        attention = LuongAttention(int(cell_dec.output_size),
                                   enc_hidden_states,
                                   memory_sequence_length=input_length,
                                   probability_fn=probability_fn,
                                   score_mask_value=score_mask_value)
    cell_dec = AttentionWrapper(cell_dec,
                                attention,
                                cell_input_fn=lambda inputs, _: inputs,
                                attention_layer_size=int(cell_dec.output_size),
                                alignment_history=alignment_history,
                                initial_cell_state=enc_final_state)
    enc_final_state = cell_dec.zero_state(batch_size, dtype=tf.float32)

    cell_dec = ActivationWrapper(cell_dec, activation=tf.tanh)
    cell_dec = NotBrokenDropoutWrapper(cell_dec, output_keep_prob=dropout)

    return cell_dec, enc_final_state
Esempio n. 9
0
    def __init__(self, hidden_size, keep_prob, attention_mechanism=None, name="RNNEncoder"):
        with vs.variable_scope(name):
            self.hidden_size = hidden_size
            self.keep_prob = keep_prob
            self.rnn_cell_fw = rnn_cell.GRUCell(self.hidden_size)
            if attention_mechanism is not None:
                self.rnn_cell_fw = AttentionWrapper(self.rnn_cell_fw, attention_mechanism)
            self.rnn_cell_fw = DropoutWrapper(self.rnn_cell_fw, input_keep_prob=self.keep_prob)

            self.rnn_cell_bw = rnn_cell.GRUCell(self.hidden_size)
            if attention_mechanism is not None:
                self.rnn_cell_bw = AttentionWrapper(self.rnn_cell_bw, attention_mechanism)
            self.rnn_cell_bw = DropoutWrapper(self.rnn_cell_bw, input_keep_prob=self.keep_prob)
            self.name = name
 def _build_train(self, config):
     # decode
     if config.model_name == "fasttext_flat":
         self.logits = tf.contrib.layers.fully_connected(
             self.first_attention, config.fn_classes, activation_fn=None)
         print("logits:", self.logits.get_shape())
         self.logits = tf.reshape(self.logits, [-1, config.fn_classes])
     elif config.model_name == "RCNN_flat":
         self.logits = tf.contrib.layers.fully_connected(self.xx_final,
                                                         config.fn_classes,
                                                         activation_fn=None)
         print("logits:", self.logits.get_shape())
         self.logits = tf.reshape(self.logits, [-1, config.fn_classes])
     else:
         encoder_state = rnn.LSTMStateTuple(self.xx_final, self.xx_final)
         if config.use_att:
             attention_mechanism = BahdanauAttention(
                 config.decode_size,
                 memory=self.xx_context,
                 memory_sequence_length=self.x_seq_length)
             cell = AttentionWrapper(self.lstm,
                                     attention_mechanism,
                                     output_attention=False)
             cell_state = cell.zero_state(dtype=tf.float32,
                                          batch_size=config.batch_size)
             cell_state = cell_state.clone(cell_state=encoder_state,
                                           attention=self.first_attention)
             train_helper = TrainingHelper(self.yy, self.y_seq_length)
             train_decoder = BasicDecoder(cell,
                                          train_helper,
                                          cell_state,
                                          output_layer=self.output_l)
             self.decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
                 train_decoder, impute_finished=True)
             self.logits = self.decoder_outputs_train.rnn_output
             # self.logits = tf.reshape(self.logits, [-1, config.max_seq_length, config.hn_classes])
             print("logits:", self.logits.get_shape())
         else:
             cell = self.lstm
             train_helper = TrainingHelper(self.yy, self.y_seq_length)
             train_decoder = BasicDecoder(cell,
                                          train_helper,
                                          encoder_state,
                                          output_layer=self.output_l)
             self.decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
                 train_decoder, impute_finished=True)
             self.logits = self.decoder_outputs_train.rnn_output
             # self.logits = tf.reshape(self.logits, [-1, config.max_seq_length, config.hn_classes])
             print("logits:", self.logits.get_shape())
def run_match_lstm(encoded_rep, masks,size):
        encoded_question, encoded_passage = encoded_rep
        masks_question, masks_passage = masks

        match_lstm_cell_attention_fn = lambda curr_input, state : tf.concat([curr_input, state], axis = -1)
        query_depth = encoded_question.get_shape()[-1]


        # output attention is false because we want to output the cell output and not the attention values
        with tf.variable_scope("match_lstm_attender"):
            attention_mechanism_match_lstm = BahdanauAttention(query_depth, encoded_question, memory_sequence_length = masks_question)
            cell = tf.contrib.rnn.BasicLSTMCell(num_units=size, state_is_tuple = True)
            lstm_attender  = AttentionWrapper(cell, attention_mechanism_match_lstm, output_attention = False, cell_input_fn = match_lstm_cell_attention_fn)

            # we don't mask the passage because masking the memories will be handled by the pointerNet
            reverse_encoded_passage = _reverse(encoded_passage, masks_passage, 1, 0)

            output_attender_fw, _ = tf.nn.dynamic_rnn(lstm_attender, encoded_passage, dtype=tf.float32, scope ="rnn")    
            output_attender_bw, _ = tf.nn.dynamic_rnn(lstm_attender, reverse_encoded_passage, dtype=tf.float32, scope = "rnn")

            output_attender_bw = _reverse(output_attender_bw, masks_passage, 1, 0)

        
        output_attender = tf.concat([output_attender_fw, output_attender_bw], axis = -1) # (-1, P, 2*H)
        return output_attender
Esempio n. 12
0
    def __init__(
            self,
            memory,
            memory_sequence_length=None,
            cell=None,
            cell_dropout_mode=None,
            vocab_size=None,
            output_layer=None,
            #attention_layer=None, # TODO(zhiting): only valid for tf>=1.0
            cell_input_fn=None,
            hparams=None):
        RNNDecoderBase.__init__(self, cell, vocab_size, output_layer,
                                cell_dropout_mode, hparams)

        attn_hparams = self._hparams['attention']
        attn_kwargs = attn_hparams['kwargs'].todict()

        # Parse the 'probability_fn' argument
        if 'probability_fn' in attn_kwargs:
            prob_fn = attn_kwargs['probability_fn']
            if prob_fn is not None and not callable(prob_fn):
                prob_fn = utils.get_function(prob_fn, [
                    'tensorflow.nn', 'tensorflow.contrib.sparsemax',
                    'tensorflow.contrib.seq2seq'
                ])
            attn_kwargs['probability_fn'] = prob_fn

        attn_kwargs.update({
            "memory_sequence_length": memory_sequence_length,
            "memory": memory
        })
        self._attn_kwargs = attn_kwargs
        attn_modules = ['tensorflow.contrib.seq2seq', 'texar.tf.custom']
        # Use variable_scope to ensure all trainable variables created in
        # the attention mechanism are collected
        with tf.variable_scope(self.variable_scope):
            attention_mechanism = utils.check_or_get_instance(
                attn_hparams["type"],
                attn_kwargs,
                attn_modules,
                classtype=tf.contrib.seq2seq.AttentionMechanism)

        self._attn_cell_kwargs = {
            "attention_layer_size": attn_hparams["attention_layer_size"],
            "alignment_history": attn_hparams["alignment_history"],
            "output_attention": attn_hparams["output_attention"],
        }
        self._cell_input_fn = cell_input_fn
        # Use variable_scope to ensure all trainable variables created in
        # AttentionWrapper are collected
        with tf.variable_scope(self.variable_scope):
            #if attention_layer is not None:
            #    self._attn_cell_kwargs["attention_layer_size"] = None
            attn_cell = AttentionWrapper(
                self._cell,
                attention_mechanism,
                cell_input_fn=self._cell_input_fn,
                #attention_layer=attention_layer,
                **self._attn_cell_kwargs)
            self._cell = attn_cell
Esempio n. 13
0
    def _get_beam_search_cell(self, beam_width):
        """Returns the RNN cell for beam search decoding.
        """
        with tf.variable_scope(self.variable_scope, reuse=True):
            attn_kwargs = copy.copy(self._attn_kwargs)

            memory = attn_kwargs['memory']
            attn_kwargs['memory'] = tile_batch(memory, multiplier=beam_width)

            memory_seq_length = attn_kwargs['memory_sequence_length']
            if memory_seq_length is not None:
                attn_kwargs['memory_sequence_length'] = tile_batch(
                    memory_seq_length, beam_width)

            attn_modules = ['tensorflow.contrib.seq2seq', 'texar.tf.custom']
            bs_attention_mechanism = utils.check_or_get_instance(
                self._hparams.attention.type,
                attn_kwargs,
                attn_modules,
                classtype=tf.contrib.seq2seq.AttentionMechanism)

            bs_attn_cell = AttentionWrapper(self._cell._cell,
                                            bs_attention_mechanism,
                                            cell_input_fn=self._cell_input_fn,
                                            **self._attn_cell_kwargs)

            self._beam_search_cell = bs_attn_cell

            return bs_attn_cell
Esempio n. 14
0
    def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training):
        cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, for_decoder=True) for i in range(self.config.rnn_layers)])

        encoder_hidden_size = int(enc_hidden_states.get_shape()[-1])
        decoder_hidden_size = int(cell_dec.output_size)
        
        # if encoder and decoder have different sizes, add a projection layer
        if encoder_hidden_size != decoder_hidden_size:
            assert False, (encoder_hidden_size, decoder_hidden_size)
            with tf.variable_scope('hidden_projection'):
                kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32)
            
                # apply a relu to the projection for good measure
                enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state)
                enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]]))
        else:
            # flatten and repack the state
            enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state))

        # to use these we need to tile the final encoder state / the memory
        # but that conflicts with our use of cell_dec on untiled inputs for the gold
        #cell_dec = ParentFeedingCellWrapper(cell_dec, tf.contrib.seq2seq.tile_batch(enc_final_state, self.config.beam_size))
        if self.config.apply_attention and False:
            attention = LuongAttention(decoder_hidden_size, enc_hidden_states, self.input_length_placeholder,
                                       probability_fn=tf.nn.softmax)
            cell_dec = AttentionWrapper(cell_dec, attention,
                                        cell_input_fn=lambda inputs, _: inputs,
                                        attention_layer_size=decoder_hidden_size,
                                        initial_cell_state=enc_final_state)
            enc_final_state = cell_dec.zero_state(self.batch_size, dtype=tf.float32)
        
        print('enc_final_state', enc_final_state)
        linear_layer = tf_core_layers.Dense(self.config.output_size)
        go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start
        decoder = BeamSearchOptimizationDecoder(training, cell_dec, output_embed_matrix, go_vector, self.config.grammar.end,
                                                enc_final_state,
                                                beam_width=self.config.beam_size, output_layer=linear_layer,
                                                gold_sequence=self.output_placeholder if training else None,
                                                gold_sequence_length=(self.output_length_placeholder+1) if training else None)
        
        if self.config.use_grammar_constraints:
            raise NotImplementedError("Grammar constraints are not implemented for the beam search yet")
        
        # dynamic_decode craps itself if we pass output_time_major=False, as it tries to transpose
        # the loss vector
        final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=True, maximum_iterations=self.config.max_length)
        return final_outputs
Esempio n. 15
0
def attention_alignment(inputs, input_lengths, memory, memory_lengths, n_layers, n_units,
                        dropout_prob, cell_type=GRUCell, attention_mechanism=BahdanauAttention, is_training=True):
    """Performs alignment over inputs, attending over memory

    Args:
        inputs (tensor):              Input sequence, with the shape of [Batch x seq_length x dimension]
        input_lengths (tensor):       The length of input sequences. Used for dynamic unrolling
        memory (tensor):              Sequence to attend
        memory_lengths (tensor):      The length of memory. Used for dynamic unrolling
        n_layers (int):               Number of layers in RNN
        n_units  (int):               Number of units in RNN
        dropout_prob (float):         Drop out rate for RNN cell
        cell_type (method):           Type of RNN cell, GRU by default
        attention_mechanism (method): Type of attention mechanism, Bahdanau by default
        is_training (bool):           Whether the model is training or testing

    returns:
        (tensor, tensor, tensor):
    """
    # get tensor dimensions
    batch_size, seq_length, dim = inputs.get_shape().as_list()
    # create a attention over the memory
    attention = attention_mechanism(n_units, memory, memory_sequence_length=memory_lengths, dtype=tf.float32)
    # build an encoder RNN over the input sequence
    dropout_prob = 0 if not is_training else dropout_prob
    if n_layers > 1:
        attention_cell = MultiRNNCell([DropoutWrapper(cell_type(n_units), output_keep_prob=1-dropout_prob)
                                       for _ in range(n_layers)])
    else:
        attention_cell = cell_type(n_units)
        attention_cell = DropoutWrapper(attention_cell, output_keep_prob=1-dropout_prob)
    # for each input to the next RNN cell, wire the attention mechanism
    a_cell = AttentionWrapper(attention_cell, attention, alignment_history=True)
    # define the initial state
    # TODO: Do we ever feed an init state?
    attention_state = a_cell.zero_state(batch_size, dtype=tf.float32)
    # read input while attending over memory
    helper = TrainingHelper(inputs=inputs, sequence_length=input_lengths)
    decoder = BasicDecoder(a_cell, helper, attention_state)
    # output of the decoder is a new representation of input sentence with attention over the question
    outputs, states, _ = tf.contrib.seq2seq.dynamic_decode(decoder, maximum_iterations=seq_length, impute_finished=True)
    outputs = tf.pad(outputs.rnn_output, [[0, 0], [0, seq_length - tf.reduce_max(input_lengths)], [0, 0]])
    outputs = tf.reshape(outputs, [batch_size, seq_length, dim])
    # attention matrix for visualizing heatmap
    aligned = tf.transpose(states.alignment_history.stack(), [1, 0, 2])
    return outputs, states, aligned
Esempio n. 16
0
    def build_decoder_cell(self, encoder_outputs, encoder_final_state,
                           hidden_size, cell_type, layer_size):
        """
        构建解码器所有层
        :param encoder_outputs:
        :param encoder_state:
        :param hidden_size:
        :param cell_type:
        :param layer_size:
        :return:
        """
        sequence_length = self.encoder_inputs_length
        if self.mode == 'decode':
            encoder_outputs = tf.contrib.seq2seq.tile_batch(
                encoder_outputs, multiplier=self.beam_width)
            encoder_final_state = tf.contrib.seq2seq.tile_batch(
                encoder_final_state, multiplier=self.beam_width)
            sequence_length = tf.contrib.seq2seq.tile_batch(
                sequence_length, multiplier=self.beam_width)

        if self.bidirection:
            cell = MultiRNNCell([
                self.one_cell(hidden_size * 2, cell_type)
                for _ in range(layer_size)
            ])
        else:
            cell = MultiRNNCell([
                self.one_cell(hidden_size, cell_type)
                for _ in range(layer_size)
            ])
        # 使用attention机制
        self.attention_mechanism = BahdanauAttention(
            num_units=self.hidden_size,
            memory=encoder_outputs,
            memory_sequence_length=sequence_length)

        def cell_input_fn(inputs, attention):
            mul = 2 if self.bidirection else 1
            attn_projection = layers.Dense(self.hidden_size * mul,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')
            return attn_projection(array_ops.concat([inputs, attention], -1))

        cell = AttentionWrapper(cell=cell,
                                attention_mechanism=self.attention_mechanism,
                                attention_layer_size=self.hidden_size,
                                cell_input_fn=cell_input_fn,
                                name='Attention_Wrapper')
        if self.mode == 'decode':
            decoder_initial_state = cell.zero_state(
                batch_size=self.batch_size * self.beam_width,
                dtype=tf.float32).clone(cell_state=encoder_final_state)
        else:
            decoder_initial_state = cell.zero_state(
                batch_size=self.batch_size,
                dtype=tf.float32).clone(cell_state=encoder_final_state)
        return cell, decoder_initial_state
Esempio n. 17
0
def Decoder_LSTM(inputs, sequence_length, attention_mechanism, is_training= False):
    '''
    In inference, input and sequence_length will be ignoired.
    '''
    cell_List = [];
    for index in range(hp.Decoder.LSTM.Nums):
        cell_List.append(ZoneoutLSTMCell(
            num_units= hp.Decoder.LSTM.Cell_Size,
            is_training= is_training,
            cell_zoneout_rate= hp.Decoder.LSTM.Zoneout_Rate,
            output_zoneout_rate= hp.Decoder.LSTM.Zoneout_Rate
            ))
    lstm_Cell = tf.nn.rnn_cell.MultiRNNCell(cell_List);
    
    attention_Wrapped_Cell = AttentionWrapper(
        cell= lstm_Cell,
        attention_mechanism= attention_mechanism,
        attention_layer_size=None,
        alignment_history=True,
        cell_input_fn=None,
        output_attention= False,
        initial_cell_state=None,
        name=None,
        attention_layer=None
        )

    helper = Decoder_Helper(
        inputs= inputs, #Mel
        sequence_length= sequence_length,   #Mel_length
        time_major= False,
        is_training= is_training,
        name= None
        )
    decoder = Decoder_Decoder(
        cell= attention_Wrapped_Cell,
        helper= helper,
        initial_state= attention_Wrapped_Cell.zero_state(tf.shape(inputs)[0], tf.float32)
        )
    final_outputs, final_state, _ = Decoder_Dynamic_Decode(
        decoder= decoder,
        impute_finished= False  #True
        )

    return final_outputs, final_state
Esempio n. 18
0
 def __call__(self, encoder_outputs,encoder_len, inputs, state):
     inputs_embedding=tf.nn.embedding_lookup(self.embedding,inputs)
     inputs_embedding=tf.expand_dims(inputs_embedding,axis=1)
     attention_mechanism = BahdanauAttention(
         num_units=self.rnn_units,
         memory=encoder_outputs,
         memory_sequence_length=encoder_len)
     sattention_cell = AttentionWrapper(self.gru_cell, attention_mechanism)
     output,state= tf.nn.dynamic_rnn(self.gru_cell,inputs_embedding,initial_state=state,dtype=tf.float32)
     output=self.out_layer(output)
     return output,state
Esempio n. 19
0
 def __init__(self, cell, prenets: Tuple[PreNet],
              attention_mechanism,
              trainable=True, name=None, **kwargs):
     super(AttentionRNN, self).__init__(name=name, trainable=trainable, **kwargs)
     attention_cell = AttentionWrapper(
         DecoderPreNetWrapper(cell, prenets),
         attention_mechanism,
         cell_input_fn=(lambda inputs, attention: inputs),  # Disable concatenation of inputs and context
         alignment_history=True,
         output_attention=False)
     concat_cell = ConcatOutputAndAttentionWrapper(attention_cell)
     self._cell = concat_cell
Esempio n. 20
0
 def create_attention_cell(depth,
                           memory,
                           seq_len,
                           cell,
                           alignment_history=False):
     attention = BahdanauAttention(depth,
                                   memory,
                                   memory_sequence_length=seq_len,
                                   normalize=True)
     attention_cell = AttentionWrapper(cell,
                                       attention,
                                       alignment_history=alignment_history)
     return attention_cell
Esempio n. 21
0
    def initialize(self,
                   enc_input,
                   sequence_length,
                   dec_input,
                   mel_target=None):
        is_training = 1 if mel_target is not None else 0
        batch = enc_input.shape[0]

        embedding = Embedding(symbol_length, embedding_dim)(enc_input)
        enc_pre = pre_net(embedding, is_training)
        enc_out = CBHG(enc_pre, sequence_length, K=16, conv_dim=[128, 128])

        dec_pre = pre_net(dec_input, is_training)

        attention_cell = AttentionWrapper(GRUCell(decoder_dim),
                                          BahdanauAttention(
                                              decoder_dim, enc_out),
                                          alignment_history=True,
                                          output_attention=False)

        concat_cell = ConcatWrapper(attention_cell)

        attention_out, state = tf.nn.dynamic_rnn(concat_cell,
                                                 dec_pre,
                                                 dtype=tf.float32)
        alignment = tf.transpose(state.alignment_history.stack(), [1, 2, 0])

        residual_gru_input = Dense(decoder_dim)(attention_out)

        for _ in range(2):
            residual_gru_input += GRU(
                decoder_dim, return_sequences=True)(residual_gru_input)

        dec_out = Dense(mel_dim * reduction)(residual_gru_input)

        mel_output = tf.reshape(dec_out, [batch, -1, mel_dim])

        self.enc_input = enc_input
        self.sequence_length = sequence_length
        self.dec_input = dec_input
        self.mel_output = mel_output
        self.alignment = alignment
        self.mel_target = mel_target

        if is_training:
            self.loss = tf.reduce_mean(MAE(self.mel_target, self.mel_output))
            self.global_step = tf.Variable(0)
            optimizer = tf.train.AdamOptimizer()
            gv = optimizer.compute_gradients(self.loss)
            self.optimize = optimizer.apply_gradients(
                gv, global_step=self.global_step)
Esempio n. 22
0
    def biLSTM_layer_op(self):
        with tf.variable_scope("bi-lstm"):
            attention_mechannism = BahdanauAttention(
                num_units=self.hidden_dim, memory=self.word_embeddings)
            cell_fw = LSTMCell(self.hidden_dim)
            cell_bw = LSTMCell(self.hidden_dim)
            att_cell_fw = AttentionWrapper(
                cell=cell_fw, attention_mechanism=attention_mechannism)
            att_cell_bw = AttentionWrapper(
                cell=cell_bw, attention_mechanism=attention_mechannism)

            (output_fw_seq,
             output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=att_cell_fw,
                 cell_bw=att_cell_bw,
                 inputs=self.word_embeddings,
                 sequence_length=self.sequence_lengths,
                 dtype=tf.float32)
            output = tf.concat([output_fw_seq, output_bw_seq], axis=-1)
            output = tf.nn.dropout(output, self.dropout_pl)

        with tf.variable_scope("proj"):
            W = tf.get_variable(
                name="W",
                shape=[2 * self.hidden_dim, self.num_tags],
                initializer=tf.contrib.layers.xavier_initializer(),
                dtype=tf.float32)

            b = tf.get_variable(name="b",
                                shape=[self.num_tags],
                                initializer=tf.zeros_initializer(),
                                dtype=tf.float32)

            s = tf.shape(output)
            output = tf.reshape(output, [-1, 2 * self.hidden_dim])
            pred = tf.matmul(output, W) + b

            self.logits = tf.reshape(pred, [-1, s[1], self.num_tags])
Esempio n. 23
0
def pointer_net(inputs, input_lengths, n_pointers, word_matrix, cell_type, n_layers, n_units,
                dropout_prob, is_training=True):
    """Pointer network.

    Args:
        inputs (tensor):        Inputs to pointer network (typically output of previous RNN)
        input_lengths (tensor): Actual non-padded lengths of each input sequence
        n_pointers (int):       Number of pointers to generate
        word_matrix (tensor):   Embedding matrix of word vectors
        cell_type (method):     Cell type to use
        n_layers (int):         Number of layers in RNN (same for encoder & decoder)
        n_units (int):          Number of units in RNN cell (same for encoder & decoder)
        dropout_prob (float):   Dropout probability
        is_training (bool):     Whether the model is training or testing
    """
    batch_size, seq_length, _ = inputs.get_shape().as_list()
    vocab_size = word_matrix.get_shape().as_list()[0]

    # instantiate RNN cell; only use dropout during training
    def _rnn_cell():
        keep_prob = 1 - dropout_prob if is_training else 1
        return DropoutWrapper(cell_type(n_units), output_keep_prob=keep_prob)

    enc_cell = MultiRNNCell([_rnn_cell() for _ in range(n_layers)]) if n_layers > 1 else _rnn_cell()
    encoded, _ = tf.nn.dynamic_rnn(enc_cell, inputs, input_lengths, dtype=tf.float32)

    attention = BahdanauAttention(n_units, encoded, memory_sequence_length=input_lengths)
    # TODO: find permanent solution (InferenceHelper?)
    start_tokens = tf.constant(START_TOKEN, shape=[batch_size], dtype=tf.int32)
    helper = GreedyEmbeddingHelper(word_matrix, start_tokens, END_TOKEN)

    dec_cell = MultiRNNCell([_rnn_cell() for _ in range(n_layers)]) if n_layers > 1 else _rnn_cell()
    attn_cell = AttentionWrapper(dec_cell, attention, alignment_history=True)
    out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell, vocab_size)
    decoder = BasicDecoder(out_cell, helper, attn_cell.zero_state(batch_size, tf.float32))
    _, states, _ = dynamic_decode(decoder, maximum_iterations=n_pointers, impute_finished=True)
    probs = tf.reshape(states.alignment_history.stack(), [n_pointers, batch_size, seq_length])
    return probs
Esempio n. 24
0
def decoder(x, decoder_inputs, keep_prob, sequence_length, memory,
            memory_length, first_attention):
    with tf.variable_scope("Decoder") as scope:
        label_embeddings = tf.get_variable(name="embeddings",
                                           shape=[n_classes, embedding_size],
                                           dtype=tf.float32)
        train_inputs_embedded = tf.nn.embedding_lookup(label_embeddings,
                                                       decoder_inputs)
        lstm = rnn.LayerNormBasicLSTMCell(n_hidden,
                                          dropout_keep_prob=keep_prob)
        output_l = layers_core.Dense(n_classes, use_bias=True)
        encoder_state = rnn.LSTMStateTuple(x, x)
        attention_mechanism = BahdanauAttention(
            embedding_size,
            memory=memory,
            memory_sequence_length=memory_length)
        cell = AttentionWrapper(lstm,
                                attention_mechanism,
                                output_attention=False)
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=train_batch_size)
        cell_state = cell_state.clone(cell_state=encoder_state,
                                      attention=first_attention)
        train_helper = TrainingHelper(train_inputs_embedded, sequence_length)
        train_decoder = BasicDecoder(cell,
                                     train_helper,
                                     cell_state,
                                     output_layer=output_l)
        decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
            train_decoder, impute_finished=True)
        tiled_inputs = tile_batch(memory, multiplier=beam_width)
        tiled_sequence_length = tile_batch(memory_length,
                                           multiplier=beam_width)
        tiled_first_attention = tile_batch(first_attention,
                                           multiplier=beam_width)
        attention_mechanism = BahdanauAttention(
            embedding_size,
            memory=tiled_inputs,
            memory_sequence_length=tiled_sequence_length)
        x2 = tile_batch(x, beam_width)
        encoder_state2 = rnn.LSTMStateTuple(x2, x2)
        cell = AttentionWrapper(lstm,
                                attention_mechanism,
                                output_attention=False)
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=test_batch_size * beam_width)
        cell_state = cell_state.clone(cell_state=encoder_state2,
                                      attention=tiled_first_attention)
        infer_decoder = BeamSearchDecoder(cell,
                                          embedding=label_embeddings,
                                          start_tokens=[GO] * test_len,
                                          end_token=EOS,
                                          initial_state=cell_state,
                                          beam_width=beam_width,
                                          output_layer=output_l)
        decoder_outputs_infer, decoder_state_infer, decoder_seq_infer = dynamic_decode(
            infer_decoder, maximum_iterations=4)
        return decoder_outputs_train, decoder_outputs_infer, decoder_state_infer
Esempio n. 25
0
    def add_decoder_cell(self, encoder_outputs, encoder_states, hidden_size,
                         cell_type, num_layers):
        encoder_seq_len = self.source_len
        if self.mode == 'decode':
            encoder_outputs = tf.contrib.seq2seq.tile_batch(
                encoder_outputs, multiplier=self.beam_size)
            encoder_states = tf.contrib.seq2seq.tile_batch(
                encoder_states, multiplier=self.beam_size)
            encoder_seq_len = tf.contrib.seq2seq.tile_batch(
                encoder_seq_len, multiplier=self.beam_size)

        hidden_size_ = hidden_size * 2 if self.bidirection else hidden_size
        cell = MultiRNNCell([
            self.one_cell(hidden_size_, cell_type) for _ in range(num_layers)
        ])
        self.attention = BahdanauAttention(self.hidden_size, encoder_outputs,
                                           encoder_seq_len)

        def cell_input_fn(inputs, attention):
            att_proj = tf.layers.Dense(hidden_size_,
                                       dtype=tf.float32,
                                       use_bias=False,
                                       name='att_proj')

            return att_proj(tf.concat([inputs, attention], axis=-1))

        decoder_cell = AttentionWrapper(cell=cell,
                                        attention_mechanism=self.attention,
                                        attention_layer_size=hidden_size,
                                        cell_input_fn=cell_input_fn,
                                        name='attentionwrapper')

        d_size = self.beam_size * self.batch_size if self.mode == 'decode' else self.batch_size
        decoder_initial_state = decoder_cell.zero_state(
            batch_size=d_size,
            dtype=tf.float32).clone(cell_state=encoder_states)

        return decoder_cell, decoder_initial_state
Esempio n. 26
0
    def add_decoder_op(self, enc_final_state, enc_hidden_states,
                       output_embed_matrix, training):
        cell_dec = tf.contrib.rnn.MultiRNNCell([
            self.make_rnn_cell(i, True) for i in range(self.config.rnn_layers)
        ])

        encoder_hidden_size = int(enc_hidden_states.get_shape()[-1])
        decoder_hidden_size = int(cell_dec.output_size)

        # if encoder and decoder have different sizes, add a projection layer
        if encoder_hidden_size != decoder_hidden_size:
            assert False, (encoder_hidden_size, decoder_hidden_size)
            with tf.variable_scope('hidden_projection'):
                kernel = tf.get_variable(
                    'kernel', (encoder_hidden_size, decoder_hidden_size),
                    dtype=tf.float32)

                # apply a relu to the projection for good measure
                enc_final_state = nest.map_structure(
                    lambda x: tf.nn.relu(tf.matmul(x, kernel)),
                    enc_final_state)
                enc_hidden_states = tf.nn.relu(
                    tf.tensordot(enc_hidden_states, kernel, [[2], [1]]))
        else:
            # flatten and repack the state
            enc_final_state = nest.pack_sequence_as(
                cell_dec.state_size, nest.flatten(enc_final_state))

        cell_dec = ParentFeedingCellWrapper(cell_dec, enc_final_state)
        if self.config.apply_attention:
            attention = LuongAttention(self.config.decoder_hidden_size,
                                       enc_hidden_states,
                                       self.input_length_placeholder,
                                       probability_fn=tf.nn.softmax)
            cell_dec = AttentionWrapper(
                cell_dec,
                attention,
                cell_input_fn=lambda inputs, _: inputs,
                attention_layer_size=self.config.decoder_hidden_size,
                initial_cell_state=enc_final_state)
            enc_final_state = cell_dec.zero_state(self.batch_size,
                                                  dtype=tf.float32)
        decoder = Seq2SeqDecoder(self.config, self.input_placeholder,
                                 self.input_length_placeholder,
                                 self.output_placeholder,
                                 self.output_length_placeholder,
                                 self.batch_number_placeholder)
        return decoder.decode(cell_dec, enc_final_state,
                              self.config.grammar.output_size,
                              output_embed_matrix, training)
Esempio n. 27
0
 def __init__(self, cell, prenets: Tuple[PreNet],
              attention_mechanism,
              trainable=True, name=None, dtype=None, **kwargs):
     super(AttentionRNN, self).__init__(trainable=trainable, name=name, dtype=dtype, **kwargs)
     attention_cell = AttentionWrapper(
         cell,
         attention_mechanism,
         alignment_history=True,
         output_attention=False)
     # prenet -> attention
     prenet_cell = DecoderPreNetWrapper(attention_cell, prenets)
     # prenet -> attention -> concat
     concat_cell = ConcatOutputAndAttentionWrapper(prenet_cell)
     self._cell = concat_cell
Esempio n. 28
0
    def __graph__(self):

        # encoder
        encoder_outputs, encoder_state = self.encoder()

        # decoder
        with tf.variable_scope('decoder'): ##作用域,'/'
            encoder_inputs_length = self.encoder_inputs_length
            if self.beam_search:
                # 如果使用beam_search,则需要将encoder的输出进行tile_batch,其实就是复制beam_size份。
                print("use beamsearch decoding..")
                encoder_outputs = tile_batch(encoder_outputs, multiplier=self.beam_size)
                encoder_state = nest.map_structure(lambda s: tf.contrib.seq2seq.tile_batch(s, self.beam_size), encoder_state)
                encoder_inputs_length = tile_batch(encoder_inputs_length, multiplier=self.beam_size)

            # 定义要使用的attention机制。
            attention_mechanism = BahdanauAttention(num_units=self.rnn_size,
                                                    memory=encoder_outputs,
                                                    memory_sequence_length=encoder_inputs_length)
            # 定义decoder阶段要是用的RNNCell,然后为其封装attention wrapper
            decoder_cell = self.create_rnn_cell()
            decoder_cell = AttentionWrapper(cell=decoder_cell,
                                            attention_mechanism=attention_mechanism,
                                            attention_layer_size=self.rnn_size,
                                            name='Attention_Wrapper')
            # 如果使用beam_seach则batch_size = self.batch_size * self.beam_size
            batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size

            # 定义decoder阶段的初始化状态,直接使用encoder阶段的最后一个隐层状态进行赋值
            decoder_initial_state = decoder_cell.zero_state(batch_size=batch_size,
                                                            dtype=tf.float32).clone(cell_state=encoder_state)

            output_layer = tf.layers.Dense(self.vocab_size, kernel_initializer=tf.truncated_normal_initializer(
                                                            mean=0.0,9
                                                            stddev=0.1))

            if self.mode == 'train':
Esempio n. 29
0
 def __init__(self,
              cell,
              prenets: Tuple[PreNet],
              attention_mechanism,
              trainable=True,
              name=None,
              **kwargs):
     super(AttentionRNN, self).__init__(name=name,
                                        trainable=trainable,
                                        **kwargs)
     attention_cell = AttentionWrapper(DecoderPreNetWrapper(cell, prenets),
                                       attention_mechanism,
                                       alignment_history=True,
                                       output_attention=False)
     self._cell = attention_cell
Esempio n. 30
0
def decoding_layer(decoding_embed_inp, embeddings, encoding_op, encoding_st,
                   v_size, fr_len, en_len, max_en_len, rnn_cell_size, word2int,
                   dropout_prob, batch_size, n_layers):

    for l in range(n_layers):
        with tf.variable_scope('decs_rnn_layer_{}'.format(l)):
            #gru = tf.contrib.rnn.GRUCell(rnn_len)
            gru = get_rnn_cell(rnn_cell_size, dropout_prob)
            decoding_cell = tf.contrib.rnn.DropoutWrapper(
                gru, input_keep_prob=dropout_prob)
    out_l = Dense(v_size,
                  kernel_initializer=tf.truncated_normal_initializer(
                      mean=0.0, stddev=0.1))

    attention = BahdanauAttention(rnn_cell_size,
                                  encoding_op,
                                  fr_len,
                                  normalize=False,
                                  name='BahdanauAttention')
    decoding_cell = AttentionWrapper(decoding_cell, attention, rnn_len)
    attention_zero_state = decoding_cell.zero_state(batch_size, tf.float32)
    attention_zero_state = attention_zero_state.clone(
        cell_state=encoding_st[0])
    with tf.variable_scope("decoding_layer"):
        logits_tr = training_decoding_layer(decoding_embed_inp, en_len,
                                            decoding_cell,
                                            attention_zero_state, out_l,
                                            v_size, max_en_len)
    with tf.variable_scope("decoding_layer", reuse=True):
        logits_inf = inference_decoding_layer(embeddings, word2int["TOKEN_GO"],
                                              word2int["TOKEN_EOS"],
                                              decoding_cell,
                                              attention_zero_state, out_l,
                                              max_en_len, batch_size)

    return logits_tr, logits_inf