Beispiel #1
0
                    return_state=True,
                    dropout=0.4,
                    recurrent_dropout=0.2)
decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm(
    dec_emb, initial_state=[state_h, state_c])

# Attention layer
attn_layer = AttentionLayer(name='attention_layer')
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])

# Concat attention input and decoder LSTM output
decoder_concat_input = Concatenate(
    axis=-1, name='concat_layer')([decoder_outputs, attn_out])

#dense layer
decoder_dense = TimeDistributed(Dense(y_voc, activation='softmax'))
decoder_outputs = decoder_dense(decoder_concat_input)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

model.summary()

model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)

#Wait for the epochs to get over
history = model.fit([x_tr, y_tr[:, :-1]],
                    y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)[:, 1:],
                    epochs=50,
Beispiel #2
0
def _create_decoder(cells,
                    batch_size,
                    encoder_outputs,
                    encoder_state,
                    encoder_lengths,
                    decoding_inputs,
                    decoding_lengths,
                    embed_matrix,
                    target_vocab_size,
                    scope,
                    max_sequence_size,
                    use_attention=True,
                    softmax_temperature=None):
    """Summary

    Parameters
    ----------
    cells : TYPE
        Description
    batch_size : TYPE
        Description
    encoder_outputs : TYPE
        Description
    encoder_state : TYPE
        Description
    encoder_lengths : TYPE
        Description
    decoding_inputs : TYPE
        Description
    decoding_lengths : TYPE
        Description
    embed_matrix : TYPE
        Description
    target_vocab_size : TYPE
        Description
    scope : TYPE
        Description
    max_sequence_size : TYPE
        Description
    use_attention : bool, optional
        Description
    softmax_temperature : float32, optional
        Values above 1.0 result in more random samples

    Returns
    -------
    TYPE
        Description
    """
    from tensorflow.python.layers.core import Dense

    # Output projection
    output_layer = Dense(target_vocab_size, name='output_projection')

    # Setup Attention
    if use_attention:
        attn_mech = tf.contrib.seq2seq.LuongAttention(
            cells.output_size, encoder_outputs, encoder_lengths, scale=True)
        cells = tf.contrib.seq2seq.AttentionWrapper(
            cell=cells,
            attention_mechanism=attn_mech,
            attention_layer_size=cells.output_size,
            alignment_history=False)
        initial_state = cells.zero_state(
            dtype=tf.float32, batch_size=batch_size)
        initial_state = initial_state.clone(cell_state=encoder_state)

    # Setup training a build decoder
    helper = tf.contrib.seq2seq.TrainingHelper(
        inputs=decoding_inputs,
        sequence_length=decoding_lengths,
        time_major=False)
    train_decoder = tf.contrib.seq2seq.BasicDecoder(
        cell=cells,
        helper=helper,
        initial_state=initial_state,
        output_layer=output_layer)
    train_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        train_decoder,
        output_time_major=False,
        impute_finished=True,
        maximum_iterations=max_sequence_size)
    train_logits = tf.identity(train_outputs.rnn_output, name='train_logits')

    # Setup inference and build decoder
    scope.reuse_variables()
    start_tokens = tf.tile(tf.constant([GO_ID], dtype=tf.int32), [batch_size])
    helper = tf.contrib.seq2seq.SampleEmbeddingHelper(
        embedding=embed_matrix, start_tokens=start_tokens, end_token=EOS_ID, softmax_temperature=softmax_temperature)
    # helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
    #     embedding=embed_matrix, start_tokens=start_tokens, end_token=EOS_ID)
    infer_decoder = tf.contrib.seq2seq.BasicDecoder(
        cell=cells,
        helper=helper,
        initial_state=initial_state,
        output_layer=output_layer)
    infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        infer_decoder,
        output_time_major=False,
        impute_finished=True,
        maximum_iterations=max_sequence_size)
    infer_logits = tf.identity(infer_outputs.sample_id, name='infer_logits')

    return train_logits, infer_logits
Beispiel #3
0
    embedding_layer = build_embedding(words_class, 130)
    embedding_layer_chi = build_embedding(150, 130, name='chi')  # 解码词语嵌入
    # lstm 输入
    lstm_inputs = tf.nn.embedding_lookup(embedding_layer, inputs)
    # 编码层
    encoder_layer = build_lstm(lstm_hidden_num, 0.9, 1)
    # 初始化lstm 层
    initial_state = encoder_layer.zero_state(batch_size, tf.float32)
    # lstm 动态时间维度展开
    encoder_lstm_outputs, encoder_final_state = tf.nn.dynamic_rnn(
        encoder_layer, lstm_inputs, dtype=tf.float32)
    # 开始构建解码层
    # 解码层第一个传入的字符为`<BOE>`
    encoder_final_c = encoder_final_state[0][0]  # 编码层最后的状态 c
    # 全连接层
    F_C_layer = Dense(fully_connect)

    #decoder_lstm_inputs = tf.nn.embedding_lookup(embedding_layer_chi, decoder_inputs)
    #decoder_layer = build_lstm(lstm_hidden_num, 0.9, 1)
    #decoder_layer = tf.contrib.rnn.LSTMCell(100)
    #decoder_lstm_outputs, decoder_final_state = tf.nn.dynamic_rnn(decoder_layer, decoder_lstm_inputs, initial_state=\
    #                                                              encoder_final_state, scope="plain_decoder")# 坑啊,两个dynamic 要用scope进行区分

    #decoder_lstm_outputs, decoder_final_state = inference_layer(decoder_inputs, encoder_final_state, is_inference=False)
    # 构建损失函数

    #decoder_logits = tf.to_float(decoder_logits, name='ToFloat32')

    # 解码层构建-for inference
    def inference_layer(inputs_infer, initial_state=None, is_inference=True):
        """
Beispiel #4
0
 def atten_decoder_input_fn(inputs, attention):
     _input_layer = Dense(self.state_size * 2)
     return _input_layer(tf.concat([inputs, attention], 1))
Beispiel #5
0
    def create_model(self):
        self.encoder_input = tf.placeholder(tf.int32, [None, None],
                                            name='encoder_input')
        self.encoder_input_lengths = tf.placeholder(
            tf.int32, [None], name='encoder_input_lengths')
        self.dropout_kp = tf.placeholder(tf.float32, name='dropout_kp')
        # GO
        self.decoder_input = tf.placeholder(tf.int32, [None, None],
                                            name='decoder_input')
        # EOS
        self.decoder_target = tf.placeholder(tf.int32, [None, None],
                                             name='decoder_target')
        self.decoder_input_lengths = tf.placeholder(
            tf.int32, [None], name='decoder_input_lengths')
        self.max_decoder_sequence_length = tf.reduce_max(
            self.decoder_input_lengths, name='max_decoder_sequence_length')
        self.max_encoder_sequence_length = tf.reduce_max(
            self.encoder_input_lengths, name='max_encoder_sequence_length')

        self.topic_words = tf.placeholder(tf.int32, [None, None],
                                          name='topic_words')

        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            W = tf.Variable(tf.constant(
                0., shape=[self.vocab_size, self.embedding_size]),
                            name="W")
            self.embedding_placeholder = tf.placeholder(
                tf.float32, [self.vocab_size, self.embedding_size],
                name='embedding_placeholder')
            embeding_init = W.assign(self.embedding_placeholder)
            encoder_embedded_inputs = tf.nn.embedding_lookup(
                embeding_init, self.encoder_input)
            decoder_embedded_input = tf.nn.embedding_lookup(
                embeding_init, self.decoder_input)
            topic_words_embedded = tf.nn.embedding_lookup(
                embeding_init, self.topic_words)

        with tf.variable_scope('content_encoder'):
            fw_encoder_cells = []
            for _ in range(self.num_layers):
                cell = tf.contrib.rnn.GRUCell(self.embedding_size)
                fw_encoder_wraped_cell = tf.contrib.rnn.DropoutWrapper(
                    cell, output_keep_prob=self.dropout_kp)
                fw_encoder_cells.append(fw_encoder_wraped_cell)

            fw_encoder_cell = tf.contrib.rnn.MultiRNNCell(fw_encoder_cells)

            bw_encoder_cells = []
            for _ in range(self.num_layers):
                cell = tf.contrib.rnn.GRUCell(self.embedding_size)
                bw_encoder_wraped_cell = tf.contrib.rnn.DropoutWrapper(
                    cell, output_keep_prob=self.dropout_kp)
                bw_encoder_cells.append(bw_encoder_wraped_cell)

            bw_encoder_cell = tf.contrib.rnn.MultiRNNCell(bw_encoder_cells)

            ((content_output_fw, content_output_bw),
             (content_output_state_fw, content_output_state_bw)) = \
                tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_encoder_cell,
                                    cell_bw=bw_encoder_cell,
                                    inputs=encoder_embedded_inputs, dtype=tf.float32)

            content_outputs = tf.concat([content_output_fw, content_output_bw],
                                        axis=-1)
            content_state = tf.squeeze(tf.concat(
                [content_output_state_fw, content_output_state_bw], axis=-1),
                                       axis=0)

        with tf.variable_scope('topic_encoder'):
            topic_fw_encoder_cells = []
            for _ in range(self.num_layers):
                cell = tf.contrib.rnn.GRUCell(self.embedding_size)
                topic_fw_encoder_wraped_cell = tf.contrib.rnn.DropoutWrapper(
                    cell, output_keep_prob=self.dropout_kp)
                topic_fw_encoder_cells.append(topic_fw_encoder_wraped_cell)

            topic_fw_encoder_cell = tf.contrib.rnn.MultiRNNCell(
                topic_fw_encoder_cells)

            topic_bw_encoder_cells = []
            for _ in range(self.num_layers):
                cell = tf.contrib.rnn.GRUCell(self.embedding_size)
                topic_bw_encoder_wraped_cell = tf.contrib.rnn.DropoutWrapper(
                    cell, output_keep_prob=self.dropout_kp)
                topic_bw_encoder_cells.append(topic_bw_encoder_wraped_cell)

            topic_bw_encoder_cell = tf.contrib.rnn.MultiRNNCell(
                topic_bw_encoder_cells)

            # num_topic_words = tf.tile(tf.constant([self.K], dtype=tf.int32), [tf.shape(self.topic_words)[0]])

            ((topic_output_fw, topic_output_bw),
             (topic_output_state_fw, topic_output_state_bw)) = \
                tf.nn.bidirectional_dynamic_rnn(cell_fw=topic_fw_encoder_cell,
                                    cell_bw=topic_bw_encoder_cell,
                                    inputs=topic_words_embedded, dtype=tf.float32)

            topic_outputs = tf.concat([topic_output_fw, topic_output_bw],
                                      axis=-1)

        with tf.variable_scope("topic_summarizer"):
            topic_words_embedded_flatten = tf.reshape(
                topic_words_embedded, [-1, self.K * self.embedding_size])
            summarizer_W = tf.get_variable(
                name='summarizer_W',
                shape=[self.K * self.embedding_size, self.summarizer_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(mean=0.0,
                                                            stddev=0.1))
            summarizer_b = tf.Variable(tf.constant(
                0.1, dtype=tf.float32, shape=[self.summarizer_size]),
                                       name='summarizer_b')
            summarizer_vector = tf.tanh(
                tf.nn.xw_plus_b(topic_words_embedded_flatten, summarizer_W,
                                summarizer_b))

        with tf.variable_scope('decoder') as decoder:
            decoder_cells = []
            for _ in range(self.num_layers):
                cell = tf.contrib.rnn.GRUCell(self.embedding_size)
                decoder_wraped_cell = tf.contrib.rnn.DropoutWrapper(
                    cell, output_keep_prob=self.dropout_kp)
                decoder_cells.append(decoder_wraped_cell)

            decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_cells)

            output_layer = Dense(
                self.vocab_size,
                kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                   stddev=0.1),
                activation=tf.nn.sigmoid)

            state_layer = Dense(
                self.embedding_size,
                kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                   stddev=0.1))

            self.decoder_outputs_array = tensor_array_ops.TensorArray(
                dtype=tf.float32,
                size=self.max_decoder_sequence_length,
                dynamic_size=False,
                infer_shape=True)

            attention_size = 10

            def content_score_mlp(hidden_state):

                content_score_W_1 = tf.get_variable(
                    name='content_score_W_1',
                    shape=[self.embedding_size, attention_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                content_score_W_2 = tf.get_variable(
                    name='content_score_W_2',
                    shape=[2 * self.embedding_size, attention_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                content_score_W_3 = tf.get_variable(
                    name='content_score_W_3',
                    shape=[self.summarizer_size, attention_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                content_score_v = tf.get_variable(
                    name='content_score_v',
                    shape=[attention_size, 1],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                addition = tf.tanh(
                    tf.matmul(hidden_state, content_score_W_1) +
                    tf.transpose(tf.tensordot(
                        content_outputs, content_score_W_2, axes=[[2], [0]]),
                                 perm=[1, 0, 2]) +
                    tf.matmul(summarizer_vector, content_score_W_3))

                addition = tf.transpose(addition, perm=[1, 0, 2])

                weight = tf.tensordot(addition,
                                      content_score_v,
                                      axes=[[2], [0]])

                return weight

            def topic_score_mlp(hidden_state):

                topic_score_W_1 = tf.get_variable(
                    name='topic_score_W_1',
                    shape=[self.embedding_size, attention_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                topic_score_W_2 = tf.get_variable(
                    name='topic_score_W_2',
                    shape=[2 * self.embedding_size, attention_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                topic_score_W_3 = tf.get_variable(
                    name='topic_score_W_3',
                    shape=[2 * self.embedding_size, attention_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                topic_score_v = tf.get_variable(
                    name='topic_score_v',
                    shape=[attention_size, 1],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                addition = tf.tanh(
                    tf.matmul(hidden_state, topic_score_W_1) +
                    tf.transpose(tf.tensordot(
                        topic_outputs, topic_score_W_2, axes=[[2], [0]]),
                                 perm=[1, 0, 2]) +
                    tf.matmul(content_outputs[:, -1, :], topic_score_W_3))

                addition = tf.transpose(addition, perm=[1, 0, 2])

                weight = tf.tensordot(addition, topic_score_v, axes=[[2], [0]])

                return weight

            decoder_state_size = 300

            def get_overall_state(hidden_state):
                content_weights = content_score_mlp(hidden_state)
                topic_weights = topic_score_mlp(hidden_state)

                content_attention_output = tf.reduce_sum(content_outputs *
                                                         content_weights,
                                                         axis=1)
                topic_attention_output = tf.reduce_sum(topic_outputs *
                                                       topic_weights,
                                                       axis=1)

                state_W = tf.get_variable(
                    name='state_W',
                    shape=[self.embedding_size, decoder_state_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                content_attention_W = tf.get_variable(
                    name='content_attention_W',
                    shape=[2 * self.embedding_size, decoder_state_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                topic_attention_W = tf.get_variable(
                    name='topic_attention_W',
                    shape=[2 * self.embedding_size, decoder_state_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1))

                decoder_b = tf.get_variable(
                    name='decoder_b',
                    shape=[decoder_state_size],
                    dtype=tf.float32,
                    initializer=tf.constant_initializer(0.1))

                overall_state = tf.matmul(hidden_state, state_W) + \
                                tf.matmul(content_attention_output, content_attention_W) + \
                                tf.matmul(topic_attention_output, topic_attention_W) + \
                                decoder_b

                return overall_state

            training_initial_state = state_layer(content_state)

            def training_decode(i, hidden_state, decoder_outputs_array):
                overall_state = get_overall_state(hidden_state)
                cell_outputs, states = decoder_cell(
                    decoder_embedded_input[:, i, :], (overall_state, ))
                outputs = output_layer(cell_outputs)
                decoder_outputs_array = decoder_outputs_array.write(i, outputs)
                return i + 1, states[0], decoder_outputs_array

            _, _, self.decoder_outputs_array = control_flow_ops.while_loop(
                cond=lambda i, _1, _2: i < self.max_decoder_sequence_length,
                body=training_decode,
                loop_vars=(tf.constant(0, dtype=tf.int32),
                           training_initial_state, self.decoder_outputs_array))

            training_decoder_output = tf.transpose(
                self.decoder_outputs_array.stack(), perm=[1, 0, 2])

        beam_width = 5

        with tf.variable_scope(decoder, reuse=True):

            def get_final_state(state):
                final_state = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                           size=beam_width,
                                                           dynamic_size=False,
                                                           infer_shape=True)
                state_array = tf.unstack(state.cell_state[0],
                                         num=beam_width,
                                         axis=1)

                for i in range(beam_width):
                    final_state = final_state.write(
                        i, get_overall_state(state_array[i]))
                final_state = tf.transpose(final_state.stack(), perm=[1, 0, 2])
                new_state = tf.contrib.seq2seq.BeamSearchDecoderState(
                    (final_state, ), state.log_probs, state.finished,
                    state.lengths)
                return new_state

            start_tokens = tf.tile(tf.constant([GO_ID], dtype=tf.int32),
                                   [tf.shape(content_state)[0]])

            overall_state = get_overall_state(state_layer(content_state))

            decoder_initial_state = tf.contrib.seq2seq.tile_batch(
                (overall_state, ), multiplier=beam_width)

            beam_search_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                cell=decoder_cell,
                embedding=embeding_init,
                start_tokens=start_tokens,
                end_token=EOS_ID,
                initial_state=decoder_initial_state,
                beam_width=beam_width,
                output_layer=output_layer)

            predicted_ids = tensor_array_ops.TensorArray(
                dtype=tf.int32,
                size=self.max_decoder_sequence_length,
                dynamic_size=False,
                infer_shape=True)

            parent_ids = tensor_array_ops.TensorArray(
                dtype=tf.int32,
                size=self.max_decoder_sequence_length,
                dynamic_size=False,
                infer_shape=True)

            scores = tensor_array_ops.TensorArray(
                dtype=tf.float32,
                size=self.max_decoder_sequence_length,
                dynamic_size=False,
                infer_shape=True)

            initial_finished, initial_inputs, initial_state = beam_search_decoder.initialize(
            )

            initial_final_state = get_final_state(initial_state)
            initial_sequence_lengths = array_ops.zeros_like(initial_finished,
                                                            dtype=tf.int32)

            num_decoder_output = tf.identity(self.max_decoder_sequence_length)

            def predicting_decode(i, input_data, hidden_state, predicted_ids,
                                  parent_ids, sequence_lengths, finished,
                                  scores):
                outputs, next_state, next_inputs, decoder_finished = beam_search_decoder.step(
                    i, input_data, hidden_state)

                next_finished = math_ops.logical_or(decoder_finished, finished)
                next_finished = math_ops.logical_or(
                    next_finished, i + 1 >= num_decoder_output)
                next_sequence_lengths = array_ops.where(
                    math_ops.logical_and(math_ops.logical_not(finished),
                                         next_finished),
                    array_ops.fill(array_ops.shape(sequence_lengths), i + 1),
                    sequence_lengths)

                states = get_final_state(next_state)
                predicted_ids = predicted_ids.write(i, outputs.predicted_ids)
                parent_ids = parent_ids.write(i, outputs.parent_ids)
                scores = scores.write(i, outputs.scores)
                return i + 1, next_inputs, states, predicted_ids, parent_ids, \
                       next_sequence_lengths, next_finished, scores

            _, _next_inputs, _states, predicted_ids, parent_ids, \
                      sequence_lengths, finished, scores = control_flow_ops.while_loop(
                cond=lambda i, _1, _2, _3, _4, _5, _6, _7: i < self.max_decoder_sequence_length,
                body=predicting_decode,
                loop_vars=(tf.constant(0, dtype=tf.int32), initial_inputs,
                           initial_final_state, predicted_ids, parent_ids,
                                  initial_sequence_lengths, initial_finished, scores)
            )

            predicted_ids = predicted_ids.stack()
            parent_ids = parent_ids.stack()
            scores = scores.stack()

            final_outputs_instance = tf.contrib.seq2seq.BeamSearchDecoderOutput(
                scores, predicted_ids, parent_ids)

            final_outputs, final_state = beam_search_decoder.finalize(
                final_outputs_instance, _states, sequence_lengths)

        self.training_logits = tf.identity(training_decoder_output,
                                           name='training_logits')

        self.predicting_logits = tf.identity(final_outputs.predicted_ids,
                                             name='predicting_logits')

        masks = tf.sequence_mask(self.decoder_input_lengths,
                                 self.max_decoder_sequence_length,
                                 dtype=tf.float32,
                                 name='masks')

        self.cost = tf.contrib.seq2seq.sequence_loss(self.training_logits,
                                                     self.decoder_target,
                                                     masks)

        optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
        gradients = optimizer.compute_gradients(self.cost)
        capped_gradients = [(tf.clip_by_value(grad, -5.0, 5.0), var)
                            for grad, var in gradients if grad is not None]
        self.train_op = optimizer.apply_gradients(capped_gradients)
Beispiel #6
0
    def build_decoder(self):
        with tf.variable_scope("decode"):
            for layer in range(self.num_layers):
                with tf.variable_scope('decoder_{}'.format(layer + 1)):
                    dec_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(2 * self.lstm_hidden_units)
                    dec_cell = tf.contrib.rnn.DropoutWrapper(dec_cell, input_keep_prob=self.keep_prob)

            self.output_layer = Dense(self.vocab_size)

            self.init_state = dec_cell.zero_state(self.batch_size, tf.float32)

            with tf.name_scope("training_decoder"):
                training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=self.dec_embed_input,
                                                                    sequence_length=self.target_sentence_length,
                                                                    time_major=False)

                training_decoder = basic_decoder.BasicDecoder(dec_cell,
                                                              training_helper,
                                                              initial_state=self.init_state,
                                                              latent_vector=self.z_tilda,
                                                              output_layer=self.output_layer)

                self.training_logits, _state, _len = tf.contrib.seq2seq.dynamic_decode(training_decoder,
                                                                                       output_time_major=False,
                                                                                       impute_finished=True,
                                                                                       maximum_iterations=self.num_tokens)

                self.training_logits = tf.identity(self.training_logits.rnn_output, 'logits')

            with tf.name_scope("validate_decoder"):
                start_token = self.word_index['GO']
                end_token = self.word_index['EOS']

                start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32), [self.batch_size],
                                       name='start_tokens')

                inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(self.embeddings,
                                                                            start_tokens,
                                                                            end_token)

                inference_decoder = basic_decoder.BasicDecoder(dec_cell,
                                                               inference_helper,
                                                               initial_state=self.init_state,
                                                               latent_vector=self.z_tilda,
                                                               output_layer=self.output_layer)

                self.validate_logits, _state, _len = tf.contrib.seq2seq.dynamic_decode(inference_decoder,
                                                                                        output_time_major=False,
                                                                                        impute_finished=True,
                                                                                        maximum_iterations=self.num_tokens)


                self.validate_sent = tf.identity(self.validate_logits.sample_id, name='predictions')

            with tf.name_scope("inference_decoder"):
                start_token = self.word_index['GO']
                end_token = self.word_index['EOS']

                start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32), [self.batch_size],
                                       name='start_tokens')

                inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(self.embeddings,
                                                                            start_tokens,
                                                                            end_token)

                inference_decoder = basic_decoder.BasicDecoder(dec_cell,
                                                               inference_helper,
                                                               initial_state=self.init_state,
                                                               latent_vector=self.z_sampled,
                                                               output_layer=self.output_layer)

                self.inference_logits, _state, _len = tf.contrib.seq2seq.dynamic_decode(inference_decoder,
                                                                                        output_time_major=False,
                                                                                        impute_finished=True,
                                                                                        maximum_iterations=self.num_tokens)

                self.inference_logits = tf.identity(self.inference_logits.sample_id, name='predictions')
Beispiel #7
0
                                     scope="encoder_rnn")

decoder_inputs = tf.placeholder(tf.int32,
                                shape=[None, None],
                                name="decoder_inputs")
decoder_labels = tf.placeholder(tf.int32,
                                shape=[None, None],
                                name="decoder_labels")
decoder_lengths = tf.placeholder(tf.int32,
                                 shape=[None],
                                 name="decoder_lengths")

decoder_emb = tf.nn.embedding_lookup(embedding, decoder_inputs)
helper = seq2seq.TrainingHelper(decoder_emb, decoder_lengths)

projection = Dense(embedding.shape[0], use_bias=False)

decoder_cell = GRUCell(encoder_cell.state_size)

decoder = seq2seq.BasicDecoder(decoder_cell,
                               helper,
                               encoder_state,
                               output_layer=projection)

decoder_outputs, _, _ = seq2seq.dynamic_decode(decoder, scope="decoder")
decoder_outputs = decoder_outputs.rnn_output

question_mask = tf.sequence_mask(decoder_lengths, dtype=tf.float32)
question_loss = seq2seq.sequence_loss(logits=decoder_outputs,
                                      targets=decoder_labels,
                                      weights=question_mask,
def attn_decoder_input_fn(inputs, attention):
    _input_layer = Dense(num_nodes,
                         name='attn_input_feeding',
                         dtype=tf.float32)
    return _input_layer(array_ops.concat([inputs, attention], -1))
    def _model(self, features, labels, mode, params):
        """
            main model.
        """
        question_sequence = features['question_seq']
        answer_sequence = features['answer_seq']

        batch_size = tf.shape(question_sequence)[0]
        start_token = tf.ones([1], tf.int32)

        model_size = params["model_size"]
        num_layers = params["num_layers"]
        keep_prob = params["keep_prob"]
        vocab_size = params["vocab_size"]
        embedding_size = params["embedding_size"]

        question_lengths = tf.reduce_sum(
            tf.to_int32(tf.not_equal(question_sequence, self.vocabs["<PAD>"])),
            1)
        answer_lengths = tf.reduce_sum(
            tf.to_int32(tf.not_equal(answer_sequence, self.vocabs["<PAD>"])),
            1)

        question_embed = layers.embed_sequence(question_sequence,
                                               vocab_size=vocab_size,
                                               embed_dim=embedding_size,
                                               scope='embed')
        answer_embed = layers.embed_sequence(answer_sequence,
                                             vocab_size=vocab_size,
                                             embed_dim=embedding_size,
                                             scope='embed',
                                             reuse=True)
        with tf.variable_scope('embed', reuse=True):
            embeddings = tf.get_variable('embeddings')
        fcells = []
        for i in range(num_layers):
            c = tf.nn.rnn_cell.GRUCell(model_size)
            c = tf.nn.rnn_cell.DropoutWrapper(c,
                                              input_keep_prob=keep_prob,
                                              output_keep_prob=keep_prob)
            fcells.append(c)
        # I cant figure out how to use tuple version.
        fcell = tf.nn.rnn_cell.MultiRNNCell(fcells)

        #bcells = []
        #for i in range(num_layers):
        #    c = tf.nn.rnn_cell.GRUCell(model_size)
        #    c = tf.nn.rnn_cell.DropoutWrapper(c, input_keep_prob=keep_prob,
        #                                    output_keep_prob=keep_prob)
        #    bcells.append(c)
        # I cant figure out how to use tuple version.
        #bcell = tf.nn.rnn_cell.MultiRNNCell(bcells)

        bcell = tf.contrib.rnn.GRUCell(num_units=model_size)

        #icell = tf.contrib.rnn.GRUCell(num_units=model_size)
        encoder_outputs, encoder_final_state = tf.nn.bidirectional_dynamic_rnn(
            fcell,
            bcell,
            question_embed,
            sequence_length=question_lengths,
            dtype=tf.float32)

        # helpers
        train_helper = tf.contrib.seq2seq.TrainingHelper(answer_embed,
                                                         answer_lengths,
                                                         time_major=False)
        start_tokens = tf.tile(tf.constant([self.vocabs['<START>']],
                                           dtype=tf.int32), [batch_size],
                               name='start_tokens')
        pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embeddings,
            start_tokens=start_tokens,
            end_token=self.vocabs["<EOS>"])

        # rnn cell and dense layer
        cell = tf.contrib.rnn.GRUCell(num_units=model_size)
        cells = []
        for i in range(num_layers):
            c = tf.nn.rnn_cell.GRUCell(model_size)
            c = tf.nn.rnn_cell.DropoutWrapper(c,
                                              input_keep_prob=keep_prob,
                                              output_keep_prob=keep_prob)
            cells.append(c)
        # I cant figure out how to use tuple version.
        cell = tf.nn.rnn_cell.MultiRNNCell(cells)
        projection_layer = Dense(
            units=vocab_size,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))

        # deocder in seq2seq model. For this case we don't have an encoder.
        def decode(helper, scope, output_max_length, reuse=None):
            with tf.variable_scope(scope, reuse=reuse):
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    num_units=model_size,
                    memory=encoder_outputs[0],
                    memory_sequence_length=question_lengths)
                #cell = tf.contrib.rnn.GRUCell(num_units=model_size)
                attn_cell = tf.contrib.seq2seq.AttentionWrapper(
                    cell, attention_mechanism, attention_layer_size=model_size)
                #out_cell = tf.contrib.rnn.OutputProjectionWrapper(
                #    attn_cell, vocab_size, reuse=reuse
                #)
                decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=attn_cell,
                    helper=helper,
                    initial_state=attn_cell.zero_state(dtype=tf.float32,
                                                       batch_size=batch_size),
                    #initial_state=encoder_final_state,
                    output_layer=projection_layer)
                outputs = tf.contrib.seq2seq.dynamic_decode(
                    decoder=decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=output_max_length)
            return outputs[0]

        train_outputs = decode(train_helper, 'decode', 3000)
        pred_outputs = decode(pred_helper, 'decode', 300, reuse=True)

        targets = answer_sequence[:, 1:]

        probs = tf.nn.softmax(pred_outputs.rnn_output, name="probs")
        # in case in prediction mode return
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions={
                                                  "probs": probs,
                                                  "syms":
                                                  pred_outputs.sample_id
                                              })

        # mask the PADs
        mask = tf.to_float(
            tf.not_equal(answer_sequence[:, :-1], self.vocabs["<PAD>"]))

        #tf.identity(mask[0], name='mask')
        #tf.identity(targets[0], name='targets')
        #tf.identity(train_outputs.rnn_output[0,output_lengths[0]-2:output_lengths[0],:], name='rnn_out')
        # Loss function
        loss = tf.contrib.seq2seq.sequence_loss(
            train_outputs.rnn_output[:, :-1, :], targets, mask)
        tf.summary.scalar("loss", loss)

        # Optimizer
        learning_rate = tf.Variable(0.0, trainable=False)
        initial_learning_rate = tf.constant(0.001)
        learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                                   tf.train.get_global_step(),
                                                   100, 0.99)
        tf.summary.scalar("learning_rate", learning_rate)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 5.0)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        # Visualise gradients
        vis_grads = [0 if i is None else i for i in grads]
        for g in vis_grads:
            tf.summary.histogram("gradients_" + str(g), g)
        train_op = optimizer.apply_gradients(
            zip(grads, tvars), global_step=tf.train.get_global_step())
        tf.identity(question_sequence[0], name="train_input")
        tf.identity(train_outputs.sample_id[0], name='train_pred')
        tf.identity(pred_outputs.sample_id[0], name='predictions')
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=None,
                                          loss=loss,
                                          train_op=train_op)
Beispiel #10
0
    def __init__(self,
                 rnn_size=128,
                 layer_size=2,
                 encoder_vocab_size,
                 decoder_vocab_size,
                 embedding_dim=200,
                 grad_clip=5,
                 is_inference=False):
        # define inputs
        self.input_x = tf.placeholder(tf.int32,
                                      shape=[None, None],
                                      name='input_ids')

        # define embedding layer
        with tf.variable_scope('embedding'):
            encoder_embedding = tf.Variable(tf.truncated_normal(
                shape=[encoder_vocab_size, embedding_dim], stddev=0.1),
                                            name='encoder_embedding')
            decoder_embedding = tf.Variable(tf.truncated_normal(
                shape=[decoder_vocab_size, embedding_dim], stddev=0.1),
                                            name='decoder_embedding')

        # define encoder
        with tf.variable_scope('encoder'):
            encoder = self._get_simple_lstm(rnn_size, layer_size)

        with tf.device('/cpu:0'):
            input_x_embedded = tf.nn.embedding_lookup(encoder_embedding,
                                                      self.input_x)

        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder,
                                                           input_x_embedded,
                                                           dtype=tf.float32)

        # define helper for decoder
        if is_inference:
            self.start_tokens = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='start_tokens')
            self.end_token = tf.placeholder(tf.int32, name='end_token')
            helper = GreedyEmbeddingHelper(decoder_embedding,
                                           self.start_tokens, self.end_token)
        else:
            self.target_ids = tf.placeholder(tf.int32,
                                             shape=[None, None],
                                             name='target_ids')
            self.decoder_seq_length = tf.placeholder(tf.int32,
                                                     shape=[None],
                                                     name='batch_seq_length')
            with tf.device('/cpu:0'):
                target_embeddeds = tf.nn.embedding_lookup(
                    decoder_embedding, self.target_ids)
            helper = TrainingHelper(target_embeddeds, self.decoder_seq_length)

        with tf.variable_scope('decoder'):
            fc_layer = Dense(decoder_vocab_size)
            decoder_cell = self._get_simple_lstm(rnn_size, layer_size)
            decoder = BasicDecoder(decoder_cell, helper, encoder_state,
                                   fc_layer)

        logits, final_state, final_sequence_lengths = dynamic_decode(decoder)

        if not is_inference:
            targets = tf.reshape(self.target_ids, [-1])
            logits_flat = tf.reshape(logits.rnn_output,
                                     [-1, decoder_vocab_size])
            print 'shape logits_flat:{}'.format(logits_flat.shape)
            print 'shape logits:{}'.format(logits.rnn_output.shape)

            self.cost = tf.losses.sparse_softmax_cross_entropy(
                targets, logits_flat)

            # define train op
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                              grad_clip)

            optimizer = tf.train.AdamOptimizer(1e-3)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        else:
            self.prob = tf.nn.softmax(logits)
            tf.contrib.rnn.LSTMStateTuple(c=encoder_last_state_c,
                                          h=encoder_last_state_h))
    encoder_last_state = tuple(encoder_last_state)

    #batch_size = batch_size * beam_width
    ######################################################### ends building encoder
    # building training decoder, no beam search
    with tf.variable_scope('shared_attention_mechanism'):
        attention_mechanism = seq2seq.BahdanauAttention(
            num_units=hidden_dim * 2,
            memory=encoder_outputs,
            memory_sequence_length=encoder_inputs_length)
    global_decoder_cell = tf.contrib.rnn.MultiRNNCell([
        tf.nn.rnn_cell.BasicLSTMCell(hidden_dim * 2) for _ in range(num_layers)
    ])
    projection_layer = Dense(label_dim)

    decoder_cell = seq2seq.AttentionWrapper(
        cell=global_decoder_cell,
        #tf.nn.rnn_cell.BasicLSTMCell(hidden_dim*2),
        attention_mechanism=attention_mechanism,
        attention_layer_size=hidden_dim * 2)
    #input_vectors = tf.nn.embedding_lookup(tgt_w, decoder_inputs)
    print(decoder_inputs.shape, decoder_inputs.shape)
    #decoder training
    training_helper = seq2seq.TrainingHelper(
        inputs=decoder_inputs_train,
        sequence_length=tf.tile(tf.constant([15], dtype=tf.int32),
                                [batch_size]),  #decoder_inputs_length_train,
        time_major=False)
    #print(decoder_cell.zero_state(batch_size,tf.float32))
Beispiel #12
0
	def __init__(self, data, args, embed):

		with tf.variable_scope("input"):
			with tf.variable_scope("embedding"):
				# build the embedding table and embedding input
				if embed is None:
					# initialize the embedding randomly
					self.embed = tf.get_variable('embed', [data.vocab_size, args.embedding_size], tf.float32)
				else:
					# initialize the embedding by pre-trained word vectors
					self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)

			self.sentence = tf.placeholder(tf.int32, (None, None), 'sen_inps')  # batch*len
			self.sentence_length = tf.placeholder(tf.int32, (None,), 'sen_lens')  # batch
			self.use_prior = tf.placeholder(dtype=tf.bool, name="use_prior")

			batch_size, batch_len = tf.shape(self.sentence)[0], tf.shape(self.sentence)[1]
			self.decoder_max_len = batch_len - 1

			self.encoder_input = tf.nn.embedding_lookup(self.embed, self.sentence)  # batch*len*unit
			self.encoder_len = self.sentence_length

			decoder_input = tf.split(self.sentence, [self.decoder_max_len, 1], 1)[0]  # no eos_id
			self.decoder_input = tf.nn.embedding_lookup(self.embed, decoder_input)  # batch*(len-1)*unit
			self.decoder_target = tf.split(self.sentence, [1, self.decoder_max_len], 1)[1]  # no go_id, batch*(len-1)
			self.decoder_len = self.sentence_length - 1
			self.decoder_mask = tf.sequence_mask(self.decoder_len, self.decoder_max_len, dtype=tf.float32)  # batch*(len-1)

		# initialize the training process
		self.learning_rate = tf.Variable(float(args.lr), trainable=False, dtype=tf.float32)
		self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * args.lr_decay)
		self.global_step = tf.Variable(0, trainable=False)

		# build rnn_cell
		cell_enc = tf.nn.rnn_cell.GRUCell(args.eh_size)
		cell_dec = tf.nn.rnn_cell.GRUCell(args.dh_size)

		# build encoder
		with tf.variable_scope('encoder'):
			encoder_output, encoder_state = dynamic_rnn(cell_enc, self.encoder_input,
														self.encoder_len, dtype=tf.float32, scope="encoder_rnn")

		with tf.variable_scope('recognition_net'):
			recog_input = encoder_state
			self.recog_mu = tf.layers.dense(inputs=recog_input, units=args.z_dim, activation=None, name='recog_mu')
			self.recog_logvar = tf.layers.dense(inputs=recog_input, units=args.z_dim, activation=None, name='recog_logvar')

			epsilon = tf.random_normal(tf.shape(self.recog_logvar), name="epsilon")
			std = tf.exp(0.5 * self.recog_logvar)
			self.recog_z = tf.add(self.recog_mu, tf.multiply(std, epsilon), name='recog_z')

			self.kld = tf.reduce_mean(
				0.5 * tf.reduce_sum(tf.exp(self.recog_logvar) + self.recog_mu * self.recog_mu - self.recog_logvar - 1,
									axis=-1))
			self.prior_z = tf.random_normal(tf.shape(self.recog_logvar), name="prior_z")
			latent_sample = tf.cond(self.use_prior,
									lambda: self.prior_z,
									lambda: self.recog_z,
									name='latent_sample')
			dec_init_state = tf.layers.dense(inputs=latent_sample, units=args.dh_size, activation=None)

		with tf.variable_scope("output_layer", initializer=tf.orthogonal_initializer()):
			self.output_layer = Dense(data.vocab_size, kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
									  use_bias=True)

		with tf.variable_scope("decode", initializer=tf.orthogonal_initializer()):
			train_helper = tf.contrib.seq2seq.TrainingHelper(
				inputs=self.decoder_input,
				sequence_length=self.decoder_len
			)
			train_decoder = tf.contrib.seq2seq.BasicDecoder(
				cell=cell_dec,
				helper=train_helper,
				initial_state=dec_init_state,
				output_layer=self.output_layer
			)
			train_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
				decoder=train_decoder,
				maximum_iterations=self.decoder_max_len,
				impute_finished=True
			)
			logits = train_output.rnn_output

			crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
				labels=self.decoder_target, logits=logits)
			crossent = tf.reduce_sum(crossent * self.decoder_mask)
			self.sen_loss = crossent / tf.to_float(batch_size)
			self.ppl_loss = crossent / tf.reduce_sum(self.decoder_mask)

			self.decoder_distribution_teacher = tf.nn.log_softmax(logits)

		with tf.variable_scope("decode", reuse=True):
			infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(self.embed, tf.fill([batch_size], data.go_id),
																	data.eos_id)
			infer_decoder = tf.contrib.seq2seq.BasicDecoder(
				cell=cell_dec,
				helper=infer_helper,
				initial_state=dec_init_state,
				output_layer=self.output_layer
			)
			infer_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
				decoder=infer_decoder,
				maximum_iterations=self.decoder_max_len,
				impute_finished=True
			)
			self.decoder_distribution = infer_output.rnn_output
			self.generation_index = tf.argmax(tf.split(self.decoder_distribution,
													   [2, data.vocab_size - 2], 2)[1], 2) + 2  # for removing UNK

		self.kl_weights = tf.minimum(tf.to_float(self.global_step) / args.full_kl_step, 1.0)
		self.kl_loss = self.kl_weights * tf.maximum(self.kld, args.min_kl)
		self.loss = self.sen_loss + self.kl_loss

		# calculate the gradient of parameters and update
		self.params = [k for k in tf.trainable_variables() if args.name in k.name]
		opt = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=args.momentum)
		gradients = tf.gradients(self.loss, self.params)
		clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients,
				args.grad_clip)
		self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
				global_step=self.global_step)

		# save checkpoint
		self.latest_saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
				max_to_keep=args.checkpoint_max_to_keep, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
		self.best_saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
				max_to_keep=1, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)

		# create summary for tensorboard
		self.create_summary(args)
Beispiel #13
0
    def __init__(self,
                 n_cond,
                 n_pred,
                 hidden_dim,
                 n_layers=2,
                 input_dim=1,
                 learning_rate=0.001,
                 output_dim=1,
                 cell_type='GRU',
                 batch_size=100,
                 optimizer='Adam',
                 teacher_forcing_ratio=0.5,
                 use_scheduled_sampling=True):
        """
        Construct graph
        TrainingHelper just iterates over the dec_inputs passed to it
        But in general a helper will take sample ids passed by basic decoder and used these to pick inputs
        BasicDecoder just implements a step function which produces outputs and sample ids at each step
            the outputs are the result of applying the rnn cell followed by an optional output layer

        what I need is a version of GreedyEmbeddingHelper,
            (A helper for use during inference.
             Uses the argmax of the output (treated as logits) and passes the
             result through an embedding layer to get the next input.)


        """
        super().__init__(n_cond,
                         n_pred,
                         teacher_forcing_ratio=teacher_forcing_ratio,
                         use_scheduled_sampling=use_scheduled_sampling)
        self.graph = tf.Graph()
        with self.graph.as_default():
            if use_scheduled_sampling:
                self.sampling_probability = tf.placeholder(
                    tf.float32, shape=()
                )  # the probability of sampling from the outputs instead of reading directly from the inputs
            self.teacher_force = tf.placeholder(tf.bool)
            cells = []
            self.keep_prob = tf.placeholder(tf.float32)
            for i in range(n_layers):
                with tf.variable_scope('RNN_{}'.format(i)):
                    if cell_type == 'GRU':
                        cells.append(
                            DropoutWrapper(tf.nn.rnn_cell.GRUCell(hidden_dim),
                                           output_keep_prob=self.keep_prob))
                    elif cell_type == 'LSTM':
                        cells.append(
                            DropoutWrapper(
                                tf.nn.rnn_cell.BasicLSTMCell(hidden_dim),
                                output_keep_prob=self.keep_prob))
                    # cells.append(tf.nn.rnn_cell.BasicLSTMCell(...))
            cell = tf.nn.rnn_cell.MultiRNNCell(cells)

            self.inputs = tf.placeholder(tf.float32,
                                         shape=(None, n_cond, input_dim))
            self.go_sym = tf.placeholder(tf.float32,
                                         shape=(None, 1, input_dim))
            self.targets = tf.placeholder(tf.float32,
                                          shape=(None, n_pred, input_dim))

            dec_input = tf.concat([self.go_sym, self.targets[:, :-1, :]], 1)
            enc_outputs, enc_state = tf.nn.dynamic_rnn(
                cell, self.inputs, dtype=tf.float32)  # returns outputs, state

            # one of the features of the dynamic seq2seq is that it can handle variable length sequences
            # but to do this you need to pad them to equal length then specify the lengths separately
            # with constant lengths we still need to specify the lengths for traininghelper, but n.b. they're all the same
            sequence_lengths = tf.constant(n_pred, shape=(batch_size, ))

            output_layer = Dense(1, activation=None)

            # if not use_scheduled_sampling:
            #     train_helper = tf.contrib.seq2seq.TrainingHelper(dec_input, sequence_lengths)
            # else:
            train_helper = tf.contrib.seq2seq.ScheduledOutputTrainingHelper(
                dec_input,
                sequence_lengths,
                self.sampling_probability,
                next_input_layer=Dense(1, activation=None))

            def sampler(time, outputs, state):
                # this isn't necessary, but just do it to get the types right
                sample_ids = math_ops.cast(math_ops.argmax(outputs, axis=-1),
                                           tf.int32)
                return sample_ids

            def looper(time, outputs, state, sample_ids):
                # next_inputs_fn: callable that takes `(time, outputs, state, sample_ids)`
                # and emits `(finished, next_inputs, next_state)`.
                next_time = time + 1
                finished = next_time >= sequence_lengths
                next_inputs = tf.reshape(
                    outputs, (batch_size, input_dim))  # collapse the time axis
                # I think this is the right thing to do based on looking at the shape of the outputs of TrainingHelper.initialize
                return (finished, outputs, state)

            inf_helper = tf.contrib.seq2seq.CustomHelper(
                lambda: (array_ops.tile([False], [batch_size]),
                         tf.reshape(self.go_sym, (batch_size, input_dim))),
                sampler, looper)  # initialize fn, sample fn, next_inputs fn

            # initialize_fn: callable that returns `(finished, next_inputs)`
            # for the first iteration.
            # sample_fn: callable that takes `(time, outputs, state)`
            # next_inputs_fn - see note on looper
            #https://github.com/tensorflow/tensorflow/issues/11540

            train_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell,
                helper=train_helper,
                initial_state=enc_state,
                output_layer=output_layer)

            inf_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell,
                helper=inf_helper,
                initial_state=enc_state,
                output_layer=output_layer)

            outputs, states, sequence_lengths = tf.cond(
                self.teacher_force, lambda: tf.contrib.seq2seq.dynamic_decode(
                    decoder=train_decoder),
                lambda: tf.contrib.seq2seq.dynamic_decode(decoder=inf_decoder))
            # outputs, states, sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=train_decoder)
            # here outputs is an instance of class BasicDecoderOutput, with attrs rnn_output, sample_ids

            self.preds = outputs.rnn_output
            self.loss = tf.reduce_mean(tf.abs(self.preds - self.targets))
            if optimizer == 'Adam':
                self.optimizer = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == 'RMSProp':
                self.optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                           decay=0.92,
                                                           momentum=0.5)

            self.train_op = self.optimizer.minimize(self.loss)

            tf.summary.scalar('loss', self.loss)

            self.summary_op = tf.summary.merge_all()
            self.saver = tf.train.Saver()
            self.init = tf.global_variables_initializer()
Beispiel #14
0
    def _init_decoder(self):
        data_y = process_decoding_input(self.data_y, self.vocab_to_int_y,
                                        self.batch_size)

        self.dec_embeddings = tf.Variable(tf.random_uniform(
            [self.vocab_size_y, self.embedding_size], -1.0, 1.0),
                                          dtype=tf.float32)

        dec_embedded = tf.nn.embedding_lookup(self.dec_embeddings, data_y)

        with tf.variable_scope("decoder"):
            dec_cell = rnn_cell(self.cell_size, self.dec_num_layers,
                                self.dec_keep_prob)

        out_layer = Dense(self.vocab_size_y,
                          kernel_initializer=tf.truncated_normal_initializer(
                              mean=0.0, stddev=0.1))

        att_mechanism = seq2seq.BahdanauAttention(self.cell_size,
                                                  self.enc_outputs,
                                                  self.x_length,
                                                  normalize=False)

        dec_cell = seq2seq.DynamicAttentionWrapper(
            dec_cell, att_mechanism, attention_size=self.cell_size)

        init_state = seq2seq.DynamicAttentionWrapperState(
            cell_state=self.enc_states[0],
            attention=_zero_state_tensors(self.cell_size, self.batch_size,
                                          tf.float32))

        with tf.variable_scope("decoding"):
            train_helper = seq2seq.TrainingHelper(
                dec_embedded, sequence_length=self.y_length, time_major=False)

            train_decoder = seq2seq.BasicDecoder(dec_cell, train_helper,
                                                 init_state, out_layer)

            train_out, _ = seq2seq.dynamic_decode(
                train_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.max_length,
                swap_memory=True)

            self.decoder_train = train_out.rnn_output

        with tf.variable_scope("decoding", reuse=True):
            start_tokens = tf.tile(
                tf.constant([self.vocab_to_int_y[START]], dtype=tf.int32),
                [self.batch_size])

            infer_helper = seq2seq.GreedyEmbeddingHelper(
                embedding=self.dec_embeddings,
                start_tokens=start_tokens,
                end_token=self.vocab_to_int_y[STOP])

            infer_decoder = seq2seq.BasicDecoder(dec_cell, infer_helper,
                                                 init_state, out_layer)

            infer_out, _ = seq2seq.dynamic_decode(
                infer_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.max_length)

            self.decoder_inference = infer_out.sample_id

        tf.identity(self.decoder_train, 'decoder_train')
        tf.identity(self.decoder_inference, 'decoder_inference')
Beispiel #15
0
    def build_graph(self):
        with tf.variable_scope('input'):
            self.inputs = tf.placeholder(tf.int32, [None, None], name='inputs')
            self.targets = tf.placeholder(tf.int32, [None, None],
                                          name='targets')
            self.learning_rate = tf.placeholder(tf.float32,
                                                name='learning_rate')
            self.target_sequence_length = tf.placeholder(
                tf.int32, (None, ), name='target_sequence_length')
            self.max_target_sequence_length = tf.reduce_max(
                self.target_sequence_length, name='max_target_length')
            self.source_sequence_length = tf.placeholder(
                tf.int32, (None, ), name='source_sequence_length')

        with tf.variable_scope('encoder'):
            encoder_embed_input = tf.contrib.layers.embed_sequence(
                self.inputs, len(self.source_letter_to_int),
                self.config.encoding_embedding_size)
            encoder_cell = tf.contrib.rnn.MultiRNNCell([
                self.get_lstm_cell(self.config.rnn_size)
                for _ in range(self.config.rnn_layers)
            ])
            encoder_output, encoder_state = tf.nn.dynamic_rnn(
                encoder_cell,
                encoder_embed_input,
                sequence_length=self.source_sequence_length,
                dtype=tf.float32)

        with tf.variable_scope('decoder'):
            # 1. embedding
            decoder_input = self.process_decoder_input(
                self.targets, self.target_letter_to_int,
                self.config.batch_size)
            target_vocab_size = len(self.target_letter_to_int)
            decoder_embeddings = tf.Variable(
                tf.random_uniform(
                    [target_vocab_size, self.config.decoding_embedding_size]))
            decoder_embed_input = tf.nn.embedding_lookup(
                decoder_embeddings, decoder_input)
            # decoder_embed_input = tf.contrib.layers.embed_sequence(decoder_input, target_vocab_size, self.config.decoding_embedding_size)
            # 2. construct the rnn
            decoder_cell = tf.contrib.rnn.MultiRNNCell([
                self.get_lstm_cell(self.config.rnn_size)
                for _ in range(self.config.rnn_layers)
            ])
            # 3. output fully connected
            output_layer = Dense(
                target_vocab_size,
                kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                   stddev=0.1))
            if self.mode == 'train':
                training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=decoder_embed_input,
                    sequence_length=self.target_sequence_length,
                    time_major=False)
                training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    decoder_cell, training_helper, encoder_state, output_layer)
                decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    training_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_target_sequence_length)
            else:
                start_tokens = tf.tile(
                    tf.constant([self.target_letter_to_int['<GO>']],
                                dtype=tf.int32), [self.config.batch_size],
                    name='start_tokens')
                predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    decoder_embeddings, start_tokens,
                    self.target_letter_to_int['<EOS>'])
                predicting_decoder = tf.contrib.seq2seq.BasicDecoder(
                    decoder_cell, predicting_helper, encoder_state,
                    output_layer)
                decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    predicting_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_target_sequence_length)

        with tf.variable_scope('loss'):
            training_logits = tf.identity(decoder_output.rnn_output, 'logits')
            predicting_logits = tf.identity(
                decoder_output.sample_id,
                name='predictions')  # used for predict
            masks = tf.sequence_mask(self.target_sequence_length,
                                     self.max_target_sequence_length,
                                     dtype=tf.float32,
                                     name='masks')
            self.loss = tf.contrib.seq2seq.sequence_loss(
                training_logits, self.targets, masks)

        with tf.name_scope('optimize'):
            # optimizer = tf.train.AdamOptimizer(lr)
            # gradients = optimizer.compute_gradients(cost)
            # capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
            # train_op = optimizer.apply_gradients(capped_gradients)
            training_variables = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(
                tf.gradients(self.loss, training_variables), 5)
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            self.train_op = optimizer.apply_gradients(zip(
                grads, training_variables),
                                                      name='train_op')
Beispiel #16
0
    def _build_model_op(self):
        # self attention
        if self.unimodal:
            input = self.input
        else:
            if self.attn_fusion:
                input = self.self_attention(self.a_input, self.v_input,
                                            self.t_input, '')
                input = input * tf.expand_dims(self.mask, axis=-1)
            else:
                input = tf.concat([self.a_input, self.v_input, self.t_input],
                                  axis=-1)

        # input = tf.nn.dropout(input, 1-self.lstm_inp_dropout)
        self.gru_output = self.BiGRU(input, 100, 'gru', 1 - self.lstm_dropout)
        self.inter = tf.nn.dropout(self.gru_output, 1 - self.dropout_lstm_out)
        # self.inter = self.gru_output
        if self.attn_2:
            self.inter = self.self_attention_2(self.inter, '')
        init = tf.glorot_uniform_initializer(seed=self.seed, dtype=tf.float32)
        if self.unimodal:
            self.inter1 = Dense(
                100,
                activation=tf.nn.tanh,
                kernel_initializer=init,
                kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))(
                    self.inter)
        else:
            self.inter1 = Dense(
                200,
                activation=tf.nn.relu,
                kernel_initializer=init,
                kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))(
                    self.inter)
            self.inter1 = self.inter1 * tf.expand_dims(self.mask, axis=-1)
            self.inter1 = Dense(
                200,
                activation=tf.nn.relu,
                kernel_initializer=init,
                kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))(
                    self.inter1)
            self.inter1 = self.inter1 * tf.expand_dims(self.mask, axis=-1)
            self.inter1 = Dense(
                200,
                activation=tf.nn.relu,
                kernel_initializer=init,
                kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))(
                    self.inter1)
        self.inter1 = self.inter1 * tf.expand_dims(self.mask, axis=-1)
        self.inter1 = tf.nn.dropout(self.inter1, 1 - self.dropout)
        self.output = Dense(
            self.emotions,
            kernel_initializer=init,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))(
                self.inter1)
        # print('self.output', self.output.get_shape())
        self.preds = tf.nn.softmax(self.output)
        # To calculate the number correct, we want to count padded steps as incorrect
        correct = tf.cast(
            tf.equal(tf.argmax(self.preds, -1, output_type=tf.int32),
                     tf.argmax(self.y, -1, output_type=tf.int32)),
            tf.int32) * tf.cast(self.mask, tf.int32)

        # To calculate accuracy we want to divide by the number of non-padded time-steps,
        # rather than taking the mean
        self.accuracy = tf.reduce_sum(tf.cast(
            correct, tf.float32)) / tf.reduce_sum(
                tf.cast(self.seq_len, tf.float32))
        # y = tf.argmax(self.y, -1)

        loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.output,
                                                          labels=self.y)
        loss = loss * self.mask

        self.loss = tf.reduce_sum(loss) / tf.reduce_sum(self.mask)
Beispiel #17
0
 def build_latent_space(self):
     with tf.name_scope("latent_space"):
         self.z_tilda = Dense(self.latent_dim, name='z_tilda')(self.h_N) # [batch_size x latent_dim]
    def build_decoder(self):
        print("building decoder and attention..")
        with tf.variable_scope('decoder'):
            # Building decoder_cell and decoder_initial_state
            self.decoder_cell, self.decoder_initial_state = self.build_decoder_cell(
            )

            # Initialize decoder embeddings to have variance=1.
            sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
            initializer = tf.random_uniform_initializer(-sqrt3,
                                                        sqrt3,
                                                        dtype=self.dtype)

            self.decoder_embeddings = tf.get_variable(
                name='embedding',
                shape=[self.num_decoder_symbols, self.embedding_size],
                initializer=initializer,
                dtype=self.dtype)

            # Input projection layer to feed embedded inputs to the cell
            # ** Essential when use_residual=True to match input/output dims
            input_layer = Dense(self.hidden_units,
                                dtype=self.dtype,
                                name='input_projection')

            # Output projection layer to convert cell_outputs to logits
            output_layer = Dense(self.num_decoder_symbols,
                                 name='output_projection')

            if self.mode == 'train':
                # decoder_inputs_embedded: [batch_size, max_time_step + 1, embedding_size]
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                # Embedded inputs having gone through input projection layer
                self.decoder_inputs_embedded = input_layer(
                    self.decoder_inputs_embedded)

                # Helper to feed inputs for training: read inputs from dense ground truth vectors
                training_helper = seq2seq.TrainingHelper(
                    inputs=self.decoder_inputs_embedded,
                    sequence_length=self.decoder_inputs_length_train,
                    time_major=False,
                    name='training_helper')

                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,
                    output_layer=output_layer)
                #output_layer=None)

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length_train)

                # decoder_outputs_train: BasicDecoderOutput
                #                        namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
                #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
                # decoder_outputs_train.sample_id: [batch_size], tf.int32
                (self.decoder_outputs_train, self.decoder_last_state_train,
                 self.decoder_outputs_length_train) = (seq2seq.dynamic_decode(
                     decoder=training_decoder,
                     output_time_major=False,
                     impute_finished=True,
                     maximum_iterations=max_decoder_length))

                # More efficient to do the projection on the batch-time-concatenated tensor
                # logits_train: [batch_size, max_time_step + 1, num_decoder_symbols]
                # self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
                self.decoder_logits_train = tf.identity(
                    self.decoder_outputs_train.rnn_output)
                # Use argmax to extract decoder symbols to emit
                self.decoder_pred_train = tf.argmax(self.decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # masks: masking for valid and padded time steps, [batch_size, max_time_step + 1]
                masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length_train,
                    maxlen=max_decoder_length,
                    dtype=self.dtype,
                    name='masks')

                def class_weighted_loss(labels, logits):
                    class_weights = tf.constant([
                        0.00017234778799135608, 0.00017234778799135608,
                        0.00017234778799135608, 1.6821366229319637e-05,
                        4.898869308918329e-05, 7.106575604186823e-05,
                        7.126891354944498e-05, 7.514392550863835e-05,
                        7.719102618435312e-05, 8.89973910758995e-05,
                        0.00010430076292140834, 0.00010567508046918493,
                        0.00011254233356378444, 0.00013745981039146453,
                        0.00015365550520395147, 0.00016343173716428013,
                        0.00016623641703291143, 0.00018462654135821253,
                        0.0001873476479039208, 0.00018800477750021655,
                        0.00020981274294876723, 0.00021602805964389768,
                        0.00024354484846033354, 0.00024936107032012903,
                        0.0002495739348066665, 0.000319111899575184,
                        0.00033594586064125193, 0.0003818581956683335,
                        0.0003838636576651593, 0.0005417806138677063,
                        0.0006711205600832021, 0.0006750650134170244,
                        0.0006953534538202605, 0.0007032603813511271,
                        0.0007207552048226591, 0.0007264535179396215,
                        0.0007633538390502503, 0.000891602363160162,
                        0.0009813883808113227, 0.0010641991144668115,
                        0.0011028839931134101, 0.0012656472742694626,
                        0.0013067898106130453, 0.0013988733031399323,
                        0.0016671901108961662, 0.0017748398034871436,
                        0.0022286969673726295, 0.0022647955802244397,
                        0.0022727983914619817, 0.002481488984505173,
                        0.002566647824356508, 0.0026578592759658715,
                        0.002682243306020604, 0.002818588715090889,
                        0.002964064261676225, 0.0029888566207422903,
                        0.0030339714376591553, 0.0032127969269917125,
                        0.0032616731479905726, 0.0033361096721148385,
                        0.00424275689171333, 0.004594299605598149,
                        0.004750383639466329, 0.005306946739139776,
                        0.005497452519519153, 0.005911782580732912,
                        0.007162605175765489, 0.007194652626216341,
                        0.007496526162980663, 0.007960420108709664,
                        0.007960420108709664, 0.008691918172753256,
                        0.009110509132914177, 0.011323977901122198,
                        0.011652209144632988, 0.012711500885054168,
                        0.013180367720978298, 0.015169857188295775,
                        0.016242473353124773, 0.022971498027990745,
                        0.024000072566557496, 0.024549692548997745,
                        0.029504676366226647, 0.035733441376874495,
                        0.03828583004665124, 0.03874710510745427,
                        0.058472904071249165, 0.0630590141944844,
                        0.08040024309796762, 0.3573344137687449
                    ])
                    weights = tf.gather(class_weights, labels)
                    unweighted_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=labels, logits=logits)
                    return unweighted_losses * weights

                # Computes per word average cross-entropy over a batch
                # Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
                if self.loss_type == 'weighted':
                    print 'using weighted loss!'
                    self.loss = seq2seq.sequence_loss(
                        logits=self.decoder_logits_train,
                        targets=self.decoder_targets_train,
                        weights=masks,
                        average_across_timesteps=True,
                        average_across_batch=True,
                        softmax_loss_function=class_weighted_loss,
                    )
                else:
                    self.loss = seq2seq.sequence_loss(
                        logits=self.decoder_logits_train,
                        targets=self.decoder_targets_train,
                        weights=masks,
                        average_across_timesteps=True,
                        average_across_batch=True,
                    )
                # Training summary for the current batch_loss
                tf.summary.scalar('loss', self.loss)

                # Contruct graphs for minimizing loss
                self.init_optimizer()

            elif self.mode == 'decode':

                # Start_tokens: [batch_size,] `int32` vector
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * data_utils.start_token
                end_token = data_utils.end_token

                def embed_and_input_proj(inputs):
                    return input_layer(
                        tf.nn.embedding_lookup(self.decoder_embeddings,
                                               inputs))

                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding: uses the argmax of the output
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,
                        end_token=end_token,
                        embedding=embed_and_input_proj)
                    # Basic decoder performs greedy decoding at each time step
                    print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=output_layer)
                else:
                    # Beamsearch is used to approximately find the most likely translation
                    print("building beamsearch decoder..")
                    inference_decoder = beam_search_decoder.BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=output_layer,
                    )
                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #                         namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output: [batch_size, max_time_step, num_decoder_symbols] 	if output_time_major=False
                #                                    [max_time_step, batch_size, num_decoder_symbols] 	if output_time_major=True
                # decoder_outputs_decode.sample_id: [batch_size, max_time_step], tf.int32		if output_time_major=False
                #                                   [max_time_step, batch_size], tf.int32               if output_time_major=True

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #                         namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids: [batch_size, max_time_step, beam_width] if output_time_major=False
                #                                       [max_time_step, batch_size, beam_width] if output_time_major=True
                # decoder_outputs_decode.beam_search_decoder_output: BeamSearchDecoderOutput instance
                #                                                    namedtuple(scores, predicted_ids, parent_ids)

                (self.decoder_outputs_decode, self.decoder_last_state_decode,
                 self.decoder_outputs_length_decode) = (
                     seq2seq.dynamic_decode(
                         decoder=inference_decoder,
                         output_time_major=False,
                         #impute_finished=True,	# error occurs
                         maximum_iterations=self.max_decode_step))

                if not self.use_beamsearch_decode:
                    # decoder_outputs_decode.sample_id: [batch_size, max_time_step]
                    # Or use argmax to find decoder symbols to emit:
                    # self.decoder_pred_decode = tf.argmax(self.decoder_outputs_decode.rnn_output,
                    #                                      axis=-1, name='decoder_pred_decode')

                    # Here, we use expand_dims to be compatible with the result of the beamsearch decoder
                    # decoder_pred_decode: [batch_size, max_time_step, 1] (output_major=False)
                    self.decoder_pred_decode = tf.expand_dims(
                        self.decoder_outputs_decode.sample_id, -1)

                else:
                    # Use beam search to approximately find the most likely translation
                    # decoder_pred_decode: [batch_size, max_time_step, beam_width] (output_major=False)
                    self.decoder_pred_decode = self.decoder_outputs_decode.predicted_ids
Beispiel #19
0
    def build_decoder(self):
        print("building decoder and attention..")
        with tf.variable_scope('decoder'):
            # Building decoder_cell and decoder_initial_state
            self.decoder_cell, self.decoder_initial_state = self.build_decoder_cell(
            )

            # Initialize decoder embeddings to have variance=1.
            sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
            initializer = tf.random_uniform_initializer(-sqrt3,
                                                        sqrt3,
                                                        dtype=self.dtype)

            self.decoder_embeddings = tf.get_variable(
                name='embedding',
                shape=[self.num_decoder_symbols, self.embedding_size],
                initializer=initializer,
                dtype=self.dtype)

            # Input projection layer to feed embedded inputs to the cell
            # ** Essential when use_residual=True to match input/output dims
            input_layer = Dense(self.hidden_units,
                                dtype=self.dtype,
                                name='input_projection')

            # Output projection layer to convert cell_outputs to logits
            output_layer = Dense(self.num_decoder_symbols,
                                 name='output_projection')

            if self.mode == 'train':
                # decoder_inputs_embedded: [batch_size, max_time_step + 1, embedding_size]
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                # Embedded inputs having gone through input projection layer
                self.decoder_inputs_embedded = input_layer(
                    self.decoder_inputs_embedded)

                # Helper to feed inputs for training: read inputs from dense ground truth vectors
                training_helper = seq2seq.TrainingHelper(
                    inputs=self.decoder_inputs_embedded,
                    sequence_length=self.decoder_inputs_length_train,
                    time_major=False,
                    name='training_helper')

                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,
                    output_layer=output_layer)
                #output_layer=None)

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length_train)

                # decoder_outputs_train: BasicDecoderOutput
                #                        namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
                #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
                # decoder_outputs_train.sample_id: [batch_size], tf.int32
                (self.decoder_outputs_train, self.decoder_last_state_train,
                 self.decoder_outputs_length_train) = (seq2seq.dynamic_decode(
                     decoder=training_decoder,
                     output_time_major=False,
                     impute_finished=True,
                     maximum_iterations=max_decoder_length))

                # More efficient to do the projection on the batch-time-concatenated tensor
                # logits_train: [batch_size, max_time_step + 1, num_decoder_symbols]
                # self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
                self.decoder_logits_train = tf.identity(
                    self.decoder_outputs_train.rnn_output)
                # Use argmax to extract decoder symbols to emit
                self.decoder_pred_train = tf.argmax(self.decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # masks: masking for valid and padded time steps, [batch_size, max_time_step + 1]
                masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length_train,
                    maxlen=max_decoder_length,
                    dtype=self.dtype,
                    name='masks')

                # Computes per word average cross-entropy over a batch
                # Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
                self.loss = seq2seq.sequence_loss(
                    logits=self.decoder_logits_train,
                    targets=self.decoder_targets_train,
                    weights=masks,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )
                # Training summary for the current batch_loss
                tf.summary.scalar('loss', self.loss)

                # Contruct graphs for minimizing loss
                self.init_optimizer()

            elif self.mode == 'decode':

                # Start_tokens: [batch_size,] `int32` vector
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * data_utils.start_token
                end_token = data_utils.end_token

                def embed_and_input_proj(inputs):
                    return input_layer(
                        tf.nn.embedding_lookup(self.decoder_embeddings,
                                               inputs))

                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding: uses the argmax of the output
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,
                        end_token=end_token,
                        embedding=embed_and_input_proj)
                    # Basic decoder performs greedy decoding at each time step
                    print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=output_layer)
                else:
                    # Beamsearch is used to approximately find the most likely translation
                    print("building beamsearch decoder..")
                    inference_decoder = beam_search_decoder.BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=output_layer,
                    )
                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #                         namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output: [batch_size, max_time_step, num_decoder_symbols]   if output_time_major=False
                #                                    [max_time_step, batch_size, num_decoder_symbols]   if output_time_major=True
                # decoder_outputs_decode.sample_id: [batch_size, max_time_step], tf.int32               if output_time_major=False
                #                                   [max_time_step, batch_size], tf.int32               if output_time_major=True

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #                         namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids: [batch_size, max_time_step, beam_width] if output_time_major=False
                #                                       [max_time_step, batch_size, beam_width] if output_time_major=True
                # decoder_outputs_decode.beam_search_decoder_output: BeamSearchDecoderOutput instance
                #                                                    namedtuple(scores, predicted_ids, parent_ids)

                (self.decoder_outputs_decode, self.decoder_last_state_decode,
                 self.decoder_outputs_length_decode) = (
                     seq2seq.dynamic_decode(
                         decoder=inference_decoder,
                         output_time_major=False,
                         #impute_finished=True,      # error occurs
                         maximum_iterations=self.max_decode_step))

                if not self.use_beamsearch_decode:
                    # decoder_outputs_decode.sample_id: [batch_size, max_time_step]
                    # Or use argmax to find decoder symbols to emit:
                    # self.decoder_pred_decode = tf.argmax(self.decoder_outputs_decode.rnn_output,
                    #                                      axis=-1, name='decoder_pred_decode')

                    # Here, we use expand_dims to be compatible with the result of the beamsearch decoder
                    # decoder_pred_decode: [batch_size, max_time_step, 1] (output_major=False)
                    self.decoder_pred_decode = tf.expand_dims(
                        self.decoder_outputs_decode.sample_id, -1)

                else:
                    # Use beam search to approximately find the most likely translation
                    # decoder_pred_decode: [batch_size, max_time_step, beam_width] (output_major=False)
                    self.decoder_pred_decode = self.decoder_outputs_decode.predicted_ids
Beispiel #20
0
    def _inference(self):

        self.embedding = tf.get_variable("embedding",
                                         [self.VOL_SIZE, self.EMBEDDING_SIZE],
                                         dtype=tf.float32)
        num_classes = self.VOL_SIZE
        # use softmax to map decoder_output to number(0-5,EOS)
        self.softmax_w = self.variable(name="softmax_w",
                                       shape=[self.HIDDEN_UNIT, num_classes])
        self.softmax_b = self.variable(name="softmax_b", shape=[num_classes])

        # prepare to compute c_i = \sum a_{ij}h_j, encoder_states are h_js
        hidden_states = []
        self.W_a = self.variable(name="attention_w_a",
                                 shape=[self.HIDDEN_UNIT, self.HIDDEN_UNIT])
        self.U_a = self.variable(name="attention_u_a",
                                 shape=[self.HIDDEN_UNIT, self.HIDDEN_UNIT])
        self.v_a = self.variable(name="attention_v_a",
                                 shape=[1, self.EMBEDDING_SIZE])

        # connect intention with decoder
        # connect intention with intention
        self.I_E = self.variable(name="intention_e",
                                 shape=[self.HIDDEN_UNIT, self.HIDDEN_UNIT])
        self.encoder_to_intention_b = self.variable(name="encoder_intention_b",
                                                    shape=[self.HIDDEN_UNIT])
        self.I_I = self.variable(name="intention_i",
                                 shape=[self.HIDDEN_UNIT, self.HIDDEN_UNIT])
        self.intention_to_decoder_b = self.variable(name="intention_decoder_b",
                                                    shape=[self.HIDDEN_UNIT])
        # self.C = self.variable(name="attention_C", shape=[self.HIDDEN_UNIT, self.HIDDEN_UNIT])

        # encoder_params = rnn_encoder.StackBidirectionalRNNEncoder.default_params()
        # encoder_params["rnn_cell"]["cell_params"][
        #     "num_units"] = self.HIDDEN_UNIT
        # encoder_params["rnn_cell"]["cell_class"] = "BasicLSTMCell"
        # encoder_params["rnn_cell"]["num_layers"] = self.N_LAYER

        with tf.variable_scope("encoder") as scope:
            encoder_embedding_vectors = tf.nn.embedding_lookup(
                self.embedding, self.encoder_inputs)
            encoder_fw_cell = self.stacked_rnn(self.HIDDEN_UNIT)
            encoder_bw_cell = self.stacked_rnn(self.HIDDEN_UNIT)
            self.encoder_initial_fw_state = self.get_state_variables(
                self.batch_size, encoder_fw_cell)
            self.encoder_initial_bw_state = self.get_state_variables(
                self.batch_size, encoder_bw_cell)
            ((outputs_fw, outputs_bw), (state_fw, state_bw)) = \
                tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_fw_cell, cell_bw=encoder_bw_cell,
                                                inputs=encoder_embedding_vectors,
                                                sequence_length=self.encoder_inputs_length,
                                                initial_state_fw=self.encoder_initial_fw_state,
                                                initial_state_bw=self.encoder_initial_bw_state,
                                                dtype=tf.float32)
        encoder_final_state_c = tf.concat(
            (state_fw[self.N_LAYER - 1][0], state_bw[self.N_LAYER - 1][0]), 1)

        encoder_final_state_h = tf.concat(
            (state_fw[self.N_LAYER - 1][1], state_bw[self.N_LAYER - 1][1]), 1)

        encoder_final_state = tf.nn.rnn_cell.LSTMStateTuple(
            c=encoder_final_state_c, h=encoder_final_state_h)

        hidden_state = tf.reshape(encoder_final_state[1],
                                  shape=(-1, self.HIDDEN_UNIT * 2))

        # compute U_a*h_j quote:"this vector can be pre-computed.. U_a is R^n * n, h_j is R^n"
        # U_ah = []
        # for h in hidden_states:
        #     ## h.shape is BATCH, HIDDEN_UNIT
        #     u_ahj = tf.matmul(h, self.U_a)
        #     U_ah.append(u_ahj)

        # hidden_states = tf.stack(hidden_states)
        self.decoder_outputs = []
        # self.internal = []
        #
        with tf.variable_scope("decoder") as scope:
            self.decoder_cell = self.stacked_rnn(self.HIDDEN_UNIT)
            self.decoder_state = self.get_state_variables(
                self.batch_size, self.decoder_cell)
        #
        # building intention network
        with tf.variable_scope("intention") as scope:
            self.intention_cell = self.stacked_rnn(self.HIDDEN_UNIT)
            self.intention_state = self.get_state_variables(
                self.batch_size, self.intention_cell)
            if self.turn_index > 0:
                tf.get_variable_scope().reuse_variables()
            # for encoder_step_hidden_state in hidden_states:
            intention_output, intention_state = self.intention_cell(
                hidden_state, self.intention_state)

        # # #
        #     cT_encoder= self._concat_hidden(encoder_state)
        initial_decoder_state = []
        for i in xrange(len(intention_state)):
            b = intention_state[i]
            c = b[0]
            h = b[1]

            Dh = tf.tanh(tf.matmul(h, self.I_I))
            initial_decoder_state.append(tf.contrib.rnn.LSTMStateTuple(c, Dh))
        # print(len(initial_decoder_state))
        initial_decoder_state = tuple(initial_decoder_state)
        print(initial_decoder_state)
        # #     intention_states.append(intention_hidden_state)
        #     intention_state = self.intention_state
        #     for encoder_step_hidden_state in hidden_states:
        #         intention_output, intention_state = self.intention_cell(encoder_step_hidden_state, intention_state)
        # # intention_state = self.intention_state

        # self.modified = []
        # for layer in xrange(len(encoder_state)):
        #     layer_intention_state = encoder_state[layer]
        #     layer_last_encoder_state = self.encoder_state[layer]
        #     h = layer_intention_state[1]
        #     c = layer_intention_state[0]
        #     eh = layer_last_encoder_state[1]
        #     ec = layer_last_encoder_state[0]
        #     self.kernel_i = tf.add(tf.matmul(h, self.I_I), self.intention_to_decoder_b)
        #     self.kernel_e = tf.add(tf.matmul(eh, self.I_E), self.encoder_to_intention_b)
        #     self.h_ = tf.concat([self.kernel_e, self.kernel_i], axis=1)
        #     cc = tf.concat([c, ec], axis=1)
        #     layer = tf.contrib.rnn.LSTMStateTuple(cc, self.h_)
        #     self.modified.append(layer)

        #

        # *****************************************mark************************************************************
        # with tf.variable_scope("decoder") as scope:
        #     if self.TRAINABLE:
        #         decoder_embedding_vectors = tf.nn.embedding_lookup(
        #             self.embedding, self.decoder_inputs)
        #         self.decoder_outputs, decoder_state = tf.nn.dynamic_rnn(cell=self.decoder_cell,
        #                                                                 inputs=decoder_embedding_vectors,
        #                                                                 sequence_length=self.decoder_inputs_length,
        #                                                                 initial_state=initial_decoder_state,
        #                                                                 dtype=tf.float32
        #                                                                 )
        #         self.intention_state_update_op = self.get_state_update_op(
        #             self.intention_state, intention_state)
        #         self.encoder_state_update_op = self.get_state_update_op(
        #             self.encoder_initial_fw_state, decoder_state)

        # *****************************************mark end********************

        # ***************try another way to decode*********************

        with tf.variable_scope("decoder") as scope:
            if self.TRAINABLE:
                decoder_embedding_vectors = tf.nn.embedding_lookup(
                    self.embedding, self.decoder_inputs)
                output_layer = Dense(
                    self.VOL_SIZE,
                    kernel_initializer=tf.truncated_normal_initializer(
                        mean=0.0, stddev=0.1))
                self.max_target_sequence_length = tf.reduce_max(
                    self.decoder_inputs_length, name='max_target_len')

                training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=decoder_embedding_vectors,
                    sequence_length=self.decoder_inputs_length,
                    time_major=False)

                training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=initial_decoder_state,
                    output_layer=output_layer)

                self.decoder_output, decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    training_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_target_sequence_length)
                self.intention_state_update_op = self.get_state_update_op(
                    self.intention_state, intention_state)
                self.encoder_state_update_op = self.get_state_update_op(
                    self.encoder_initial_fw_state, decoder_state)

            else:
                # https://github.com/tensorflow/tensorflow/issues/11598
                # PREDICTING_DECODER  ## METHOD 1
                output_layer = Dense(
                    self.VOL_SIZE,
                    kernel_initializer=tf.truncated_normal_initializer(
                        mean=0.0, stddev=0.1))
                greedy_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding=self.embedding,\
                                start_tokens = tf.tile(tf.constant([self.data_config.GO_], dtype=tf.int32), [self.batch_size]),\
                                end_token = self.data_config.EOS_)
                infer_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=greedy_helper,
                    initial_state=initial_decoder_state,
                    output_layer=output_layer)
                self.decoder_output, decoder_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
                    infer_decoder,
                    impute_finished=True,
                    maximum_iterations=100)
                logits = tf.identity(self.decoder_output.rnn_output, 'logits')
                # self.predictions_ = tf.argmax(logits, axis=2)
                time_major = tf.transpose(logits, [1, 0, 2])
                print(time_major)
                (self.predictions_, self.log_probabilities) = tf.nn.ctc_beam_search_decoder(inputs=time_major, \
                                                                             sequence_length=final_sequence_lengths,\
                                                                             beam_width=self.beam_width,
                                                                             top_paths=self.paths,
                                                                             merge_repeated=True)
Beispiel #21
0
    def build_decoder(self):
        with tf.variable_scope("decoder"):
            decoder_cell, decoder_initial_state = self.build_decoder_cell()

            # start tokens : [batch_size], which is fed to BeamsearchDecoder during inference
            start_tokens = tf.ones([self.batch_size],
                                   dtype=tf.int32) * data_util.ID_GO
            end_token = data_util.ID_EOS
            input_layer = Dense(self.state_size * 2, dtype=tf.float32,
                                name="input_layer")
            output_layer = Dense(self.decoder_vocab_size,
                                 name="output_projection")
            if self.mode == "train":
                # feed ground truth decoder input token every time step
                decoder_input_lookup = tf.nn.embedding_lookup(
                    self.embedding_matrix, self.decoder_input)
                decoder_input_lookup = input_layer(decoder_input_lookup)
                training_helper = seq2seq.TrainingHelper(
                    inputs=decoder_input_lookup,
                    sequence_length=self.decoder_train_len,
                    name="training_helper")
                training_decoder = seq2seq.BasicDecoder(cell=decoder_cell,
                                                        initial_state=decoder_initial_state,
                                                        helper=training_helper,
                                                        output_layer=output_layer)

                # decoder_outputs_train: BasicDecoderOutput
                #                        namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
                #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
                # decoder_outputs_train.sample_id: [batch_size], tf.int32
                max_decoder_len = tf.reduce_max(self.decoder_train_len)
                decoder_outputs_train, final_state, _ = seq2seq.dynamic_decode(
                    training_decoder, impute_finished=True, swap_memory=True,
                    maximum_iterations=max_decoder_len)
                self.decoder_logits_train = tf.identity(
                    decoder_outputs_train.rnn_output)
                decoder_pred = tf.argmax(self.decoder_logits_train, axis=2)
                # sequence mask for get valid sequence except zero padding
                weights = tf.sequence_mask(self.decoder_len,
                                           maxlen=max_decoder_len,
                                           dtype=tf.float32)
                # compute cross entropy loss for all sequence prediction and ignore loss from zero padding
                self.loss = seq2seq.sequence_loss(
                    logits=self.decoder_logits_train,
                    targets=self.decoder_target,
                    weights=weights, average_across_batch=True,
                    average_across_timesteps=True)
                tf.summary.scalar("loss", self.loss)

                with tf.variable_scope("train_optimizer") and tf.device(
                        "/device:GPU:1"):
                    # use AdamOptimizer and clip gradient by max_norm 5.0
                    # use global step for counting every iteration
                    params = tf.trainable_variables()
                    gradients = tf.gradients(self.loss, params)
                    clipped_gradients, _ = tf.clip_by_global_norm(gradients,
                                                                  5.0)
                    learning_rate = tf.train.exponential_decay(self.lr,
                                                               self.global_step,
                                                               10000, 0.96)
                    opt = tf.train.AdagradOptimizer(learning_rate)

                    self.train_op = opt.apply_gradients(
                        zip(clipped_gradients, params),
                        global_step=self.global_step)

            elif self.mode == "test":
                def embedding_proj(inputs):
                    return input_layer(
                        tf.nn.embedding_lookup(self.embedding_matrix,
                                               inputs))

                inference_decoder = seq2seq.BeamSearchDecoder(cell=decoder_cell,
                                                              embedding=embedding_proj,
                                                              start_tokens=start_tokens,
                                                              end_token=end_token,
                                                              initial_state=decoder_initial_state,
                                                              beam_width=self.beam_depth,
                                                              output_layer=output_layer)

                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #                         namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output: [batch_size, max_time_step, num_decoder_symbols] 	if output_time_major=False
                #                                    [max_time_step, batch_size, num_decoder_symbols] 	if output_time_major=True
                # decoder_outputs_decode.sample_id: [batch_size, max_time_step], tf.int32		if output_time_major=False
                #                                   [max_time_step, batch_size], tf.int32               if output_time_major=True

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #                         namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids: [batch_size, max_time_step, beam_width] if output_time_major=False
                #                                       [max_time_step, batch_size, beam_width] if output_time_major=True
                # decoder_outputs_decode.beam_search_decoder_output: BeamSearchDecoderOutput instance
                #                                                    namedtuple(scores, predicted_ids, parent_ids)
                with tf.device("/device:GPU:1"):
                    decoder_outputs, decoder_last_state, decoder_output_length = \
                        seq2seq.dynamic_decode(decoder=inference_decoder,
                                               output_time_major=False,
                                               swap_memory=True,
                                               maximum_iterations=self.max_iter)
                    self.decoder_pred_test = decoder_outputs.predicted_ids
Beispiel #22
0
    def __init__(self, params):
        # Input variable
        batch_size = params.batch_size
        gen_length = params.gen_length
        if infer == 1:
            batch_size = 1
            gen_length = 1
        self.dropout_keep = tf.placeholder_with_default(tf.constant(1.0),
                                                        shape=None)
        self.lr = tf.placeholder_with_default(tf.constant(0.01), shape=None)
        self.x_word = tf.placeholder(tf.int32,
                                     shape=(None, params.turn_num *
                                            params.utc_length),
                                     name='x_word')
        self.x_api = tf.placeholder(tf.float32, shape=(None, 3), name='x_api')
        self.y_word_in = tf.placeholder(tf.int32,
                                        shape=(None, gen_length),
                                        name='y_word')
        self.y_word_out = tf.placeholder(tf.int32,
                                         shape=(None, gen_length),
                                         name='y_word')
        self.y_len = tf.placeholder(tf.int32, shape=(None, ))
        # Word embedding
        x_embedding = tf.get_variable(
            name='x_embedding', shape=[params.vocab_size, params.embed_size])
        x_word_embedded = tf.nn.embedding_lookup(x_embedding, self.x_word)
        y_embedding = tf.get_variable(
            name='y_embedding', shape=[params.vocab_size, params.embed_size])
        y_word_embedded = tf.nn.embedding_lookup(y_embedding, self.y_word_in)
        # Extend x_api to concat with y_word_embedded
        x_api = tf.expand_dims(self.x_api, 1)
        x_api_extend = x_api
        for i in range(gen_length - 1):
            x_api_extend = tf.concat([x_api_extend, x_api], 1)
        # y_word_embedded = tf.concat([y_word_embedded, x_api_extend], 2)

        def single_cell(state_size):  # define the cell of LSTM
            return tf.contrib.rnn.BasicLSTMCell(state_size)

        # Encoder
        with tf.variable_scope('encoder'):
            self.encoder_multi_cell = tf.contrib.rnn.MultiRNNCell([
                single_cell(params.state_size) for _ in range(params.layer_num)
            ])  # multi-layer
            self.encoder_outputs, self.encoder_state = tf.nn.dynamic_rnn(
                self.encoder_multi_cell,
                x_word_embedded,
                sequence_length=[params.utc_length] * params.batch_size,
                dtype=tf.float32,
                scope='encoder')
        with tf.variable_scope('decoder'):
            self.decoder_multi_cell = tf.contrib.rnn.MultiRNNCell([
                single_cell(params.state_size) for _ in range(params.layer_num)
            ])  # multi-layer

            attn_mech = tf.contrib.seq2seq.BahdanauAttention(
                num_units=params.state_size,  # LuongAttention
                memory=self.encoder_outputs,
                name='attention_mechanic')
            attn_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(
                self.decoder_multi_cell,
                attention_mechanism=attn_mech,
                attention_size=128,
                name="attention_wrapper")
            attn_zero = attn_cell.zero_state(batch_size=batch_size,
                                             dtype=tf.float32)
            init_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(
                cell_state=self.encoder_state, attention=attn_zero)
            train_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=y_word_embedded,
                sequence_length=self.y_len,
                time_major=False)
            projection_layer = Dense(params.vocab_size)
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=attn_cell,  # attn_cell,
                helper=train_helper,  # A Helper instance
                initial_state=init_state,  # initial state of decoder
                output_layer=projection_layer
            )  # instance of tf.layers.Layer, like Dense

            # Perform dynamic decoding with decoder
            self.decoder_outputs, self.decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder)
        self.w = tf.get_variable(
            "softmax_w",
            [params.vocab_size, params.vocab_size])  # weights for output
        self.b = tf.get_variable("softmax_b", [params.vocab_size])
        outputs = self.decoder_outputs[0]
        # Loss
        output = tf.reshape(outputs, [-1, params.vocab_size])
        self.logits = tf.matmul(output, self.w) + self.b
        self.probs = tf.nn.softmax(self.logits)
        targets = tf.reshape(self.y_word_out, [-1])
        weights = tf.ones_like(targets, dtype=tf.float32)
        # print outputs
        # print self.logits
        # print targets
        # print weights

        loss = tf.contrib.legacy_seq2seq.sequence_loss([self.logits],
                                                       [targets], [weights])
        self.cost = tf.reduce_sum(loss) / batch_size
        optimizer = tf.train.AdamOptimizer(self.lr)
        tvars = tf.trainable_variables()
        grads = tf.gradients(self.cost, tvars)
        grads, _ = tf.clip_by_global_norm(grads, params.grad_clip)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Beispiel #23
0
    def initialize_model(self):
        # encoder_emb_layer = self.embedding_matrix
        # decoder_emb_layer = self.embedding_matrix

        INPUT_NUM_VOCAB = len(self.src_dictionary)
        OUTPUT_NUM_VOCAB = len(self.src_dictionary)

        tf.reset_default_graph()

        self.encoder_input_seq = tf.placeholder(tf.int32, [None, None], name='encoder_input_seq')

        self.encoder_seq_len = tf.placeholder(tf.int32, (None,), name='encoder_seq_len')

        # Decoder placeholders
        self.decoder_output_seq = tf.placeholder(tf.int32, [None, None], name='decoder_output_seq')

        self.decoder_seq_len = tf.placeholder(tf.int32, (None,), name='decoder_seq_len')

        max_decoder_seq_len = tf.reduce_max(self.decoder_seq_len, name='max_decoder_seq_len')

        encoder_input_embedded = tf.nn.embedding_lookup(self.embedding_matrix, self.encoder_input_seq)

        encoder_multi_cell = tf.nn.rnn_cell.BasicLSTMCell(self.RNN_STATE_DIM)

        self.encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_multi_cell, encoder_input_embedded, sequence_length=self.encoder_seq_len, dtype=tf.float64)

        decoder_raw_seq = self.decoder_output_seq[:, :-1]

        go_prefixes = tf.fill([self.BATCH_SIZE, 1], self.src_dictionary[('<s>', 'None', 'None')])

        decoder_input_seq = tf.concat([go_prefixes, decoder_raw_seq], 1)

        decoder_input_embedded = tf.nn.embedding_lookup(self.embedding_matrix,
                                                        decoder_input_seq)

        decoder_multi_cell = tf.nn.rnn_cell.BasicLSTMCell(self.RNN_STATE_DIM)

        output_layer_kernel_initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.1)
        output_layer = Dense(
            OUTPUT_NUM_VOCAB,
            kernel_initializer=output_layer_kernel_initializer
        )

        with tf.variable_scope("decode"):
            training_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=decoder_input_embedded,
                sequence_length=[self.max_length for x in range(self.BATCH_SIZE)],
                time_major=False
            )

            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                decoder_multi_cell,
                training_helper,
                encoder_state,
                output_layer
            )

            training_decoder_output_seq, _, _ = tf.contrib.seq2seq.dynamic_decode(
                training_decoder,
                impute_finished=True,
                maximum_iterations=self.max_length
            )

        with tf.variable_scope("decode", reuse=True):
            start_tokens = tf.tile(
                tf.constant([self.src_dictionary[('<s>', 'None', 'None')]],
                            dtype=tf.int32),
                [self.BATCH_SIZE],
                name='start_tokens')

            # Helper for the inference process.
            inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding=self.embedding_matrix,
                start_tokens=start_tokens,
                end_token=self.src_dictionary[('</s>', 'None', 'None')]
            )

            # Basic decoder
            inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                decoder_multi_cell,
                inference_helper,
                encoder_state,
                output_layer
            )

            # Perform dynamic decoding using the decoder
            inference_decoder_output_seq, _, _ = tf.contrib.seq2seq.dynamic_decode(
                inference_decoder,
                impute_finished=True,
                maximum_iterations=self.max_length
            )

        training_logits = tf.identity(training_decoder_output_seq.rnn_output, name='logits')
        inference_logits = tf.identity(inference_decoder_output_seq.sample_id, name='predictions')

        # Create the weights for sequence_loss
        masks = tf.sequence_mask(
            self.decoder_seq_len,
            self.max_length,
            dtype=tf.float64,
            name='masks'
        )

        self.cost = tf.contrib.seq2seq.sequence_loss(
            training_logits,
            self.decoder_output_seq,
            masks
        )

        optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE)
        self.train_pred = training_decoder_output_seq.sample_id

        gradients = optimizer.compute_gradients(self.cost)
        capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var)
                            for grad, var in gradients if grad is not None]
        self.train_op = optimizer.apply_gradients(capped_gradients)
Beispiel #24
0
    def build_latent_space(self):
        with tf.name_scope("latent_space"):
            self.z_mean = Dense(self.latent_dim, name='z_mean')(self.h_N)
            self.z_log_sigma = Dense(self.latent_dim, name='z_log_sigma')(self.h_N)

            self.z_vector = tf.identity(self.sample_gaussian(), name='z_vector')
Beispiel #25
0
    [
        4.7, 3.2, 1.3, 0.2, 5.1, 3.5, 1.4, 0.2, 5.1, 3.5, 1.4, 0.2, 5.1, 3.5,
        1.4, 0.2, 5.1, 3.5, 1.4, 0.2, 5.1, 3.5, 1.4, 0.2, 5.1, 3.5, 1.4, 0.2,
        5.1, 3.5
    ],
    [
        4.6, 3.1, 1.5, 0.2, 5.1, 3.5, 1.4, 0.2, 5.1, 3.5, 1.4, 0.2, 5.1, 3.5,
        1.4, 0.2, 5.1, 3.5, 1.4, 0.2, 5.1, 3.5, 1.4, 0.2, 5.1, 3.5, 1.4, 0.2,
        5.1, 3.5
    ],
])
labels = np.array([0, 1, 0, 1])
print('------A. Training------')

model = Sequential()
layer1 = Dense(200, activation='relu', input_dim=4)
model.add(layer1)
layer2 = Dense(200, activation='relu')
model.add(layer2)
layer3 = Dense(3, activation='softmax')
model.add(layer3)
model.compile('adam', loss='categorical_crossentropy', metrics=['accuracy'])

# predefined multiclass dataset
train_output = model.fit(data, labels, batch_size=20, epochs=5)

print('---------------------')
print(train_output.history)

print('------B. Evaluation------')
# predefined eval dataset
                for a in range(SIZE_RNN_LAYER):
                    cell = rnn.BasicLSTMCell(SIZE_RNN_STATE)
                    cell = rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
                    cell_decode.append(cell)
                multi_rnn_decode = rnn.MultiRNNCell(cell_decode, state_is_tuple=True)

                dec_cell = tf.contrib.seq2seq.AttentionWrapper(
                    cell=multi_rnn_decode,
                    attention_mechanism=attn_luong,
                    attention_layer_size=SIZE_ATTN,
                    name="attention_wrapper")

                initial_state = dec_cell.zero_state(dtype=tf.float32, batch_size=batch_size)
                initial_state = initial_state.clone(cell_state=state_enc)

                output_layer = Dense(voc_size_kor, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))

            # train mode
            with tf.variable_scope("decoder_layer"):
                train_helper = tf.contrib.seq2seq.TrainingHelper(inputs=embed_dec,
                                                                 sequence_length=dec_pad_len,
                                                                 time_major=False)
                train_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, train_helper, initial_state, output_layer)

                output_train_dec, state_train_dec, len_train_dec = tf.contrib.seq2seq.dynamic_decode(
                    decoder=train_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=padded_kor_len)

            # predict mode
Beispiel #27
0
    def __init__(self, lstm_size, lstm_layers, source_vocab_size,
                 enc_embedding_size, tgt_word_to_int, dec_embedding_size,
                 tgt_max_length):

        #-----------------------------------------------------------------------
        # Placeholders
        #-----------------------------------------------------------------------
        self.inputs = tf.placeholder(tf.int32, [None, None], name='inputs')
        self.targets = tf.placeholder(tf.int32, [None, None], name='targets')
        self.batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        self.tgt_seq_length = tf.placeholder(tf.int32, [None],
                                             name='tgt_seq_length')
        self.src_seq_length = tf.placeholder(tf.int32, [None],
                                             name='src_seq_length')

        #-----------------------------------------------------------------------
        # Encoder
        #-----------------------------------------------------------------------
        with tf.variable_scope('encoder'):
            with tf.variable_scope('embedding'):
                enc_embed = tf.contrib.layers.embed_sequence(
                    self.inputs, source_vocab_size, enc_embedding_size)
            with tf.variable_scope('rnn'):
                enc_cell = tf.contrib.rnn.MultiRNNCell(
                         [tf.contrib.rnn.BasicLSTMCell(lstm_size) \
                          for _ in range(lstm_layers)])

            self.initial_state = enc_cell.zero_state(self.batch_size,
                                                     tf.float32)

            _, self.enc_state = tf.nn.dynamic_rnn(
                enc_cell,
                enc_embed,
                sequence_length=self.src_seq_length,
                initial_state=self.initial_state)

        #-----------------------------------------------------------------------
        # Decoder
        #-----------------------------------------------------------------------
        target_vocab_size = len(tgt_word_to_int)
        with tf.variable_scope('decoder'):

            #-------------------------------------------------------------------
            # Embedding
            #-------------------------------------------------------------------
            with tf.variable_scope('embedding'):
                self.dec_embed = tf.Variable(
                    tf.random_uniform([target_vocab_size, dec_embedding_size]))

            #-------------------------------------------------------------------
            # Final classifier
            #-------------------------------------------------------------------
            with tf.variable_scope('classifier') as classifier_scope:
                self.output_layer = Dense(target_vocab_size,
                                      kernel_initializer = \
                                        tf.truncated_normal_initializer(
                                          mean = 0.0, stddev=0.1))

            #-------------------------------------------------------------------
            # RNN
            #-------------------------------------------------------------------
            with tf.variable_scope('rnn'):
                self.dec_cell = tf.contrib.rnn.MultiRNNCell(
                                  [tf.contrib.rnn.BasicLSTMCell(lstm_size) \
                                   for _ in range(lstm_layers)])

            #-------------------------------------------------------------------
            # Inference decoder
            #-------------------------------------------------------------------
            with tf.variable_scope('decoder'):
                start_tokens = tf.tile([tgt_word_to_int['<s>']],
                                       [self.batch_size])

                helper = seq2seq.GreedyEmbeddingHelper(self.dec_embed,
                                                       start_tokens,
                                                       tgt_word_to_int['</s>'])

                decoder = seq2seq.BasicDecoder(self.dec_cell, helper,
                                               self.enc_state,
                                               self.output_layer)
                outputs, _, _ = seq2seq.dynamic_decode(decoder,
                                                       impute_finished=\
                                                         True,
                                                       maximum_iterations=\
                                                         tgt_max_length)

        self.outputs = tf.identity(outputs.sample_id, 'predictions')
Beispiel #28
0
def decoding_layer(phonem_dict, decoding_embedding_size, num_layers, rnn_size,
                   target_sequence_length, max_target_sequence_length,
                   encoder_state, decoder_input):
    '''
    构造Decoder层
    参数:
    - target_letter_to_int: target数据的映射表
    - decoding_embedding_size: embed向量大小
    - num_layers: 堆叠的RNN单元数量
    - rnn_size: RNN单元的隐层结点数量
    - target_sequence_length: target数据序列长度
    - max_target_sequence_length: target数据序列最大长度
    - encoder_state: encoder端编码的状态向量
    - decoder_input: decoder端输入
    '''

    # 1. Embedding
    target_vocab_size = len(phonem_dict)
    decoder_embeddings = tf.Variable(
        tf.random_uniform([target_vocab_size, decoding_embedding_size]))
    decoder_embed_input = tf.nn.embedding_lookup(decoder_embeddings,
                                                 decoder_input)

    # 构造Decoder中的RNN单元
    def get_decoder_cell(rnn_size):
        decoder_cell = tf.contrib.rnn.LSTMCell(
            rnn_size,
            initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
        return decoder_cell

    cell = tf.contrib.rnn.MultiRNNCell(
        [get_decoder_cell(rnn_size) for _ in range(num_layers)])

    # Output全连接层
    # target_vocab_size定义了输出层的大小
    output_layer = Dense(target_vocab_size,
                         kernel_initializer=tf.truncated_normal_initializer(
                             mean=0.1, stddev=0.1))

    # 4. Training decoder
    with tf.variable_scope("decode"):
        training_helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=decoder_embed_input,
            sequence_length=target_sequence_length,
            time_major=False)

        training_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell, training_helper, encoder_state, output_layer)
        training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            training_decoder,
            impute_finished=True,
            maximum_iterations=max_target_sequence_length)

    # 5. Predicting decoder
    # 与training共享参数

    with tf.variable_scope("decode", reuse=True):
        # 创建一个常量tensor并复制为batch_size的大小
        start_tokens = tf.tile(tf.constant([phonem_dict['_sos_']],
                                           dtype=tf.int32),
                               [tf.shape(target_sequence_length)[0]],
                               name='start_token')
        predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            decoder_embeddings, start_tokens, phonem_dict['_eos_'])

        predicting_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell, predicting_helper, encoder_state, output_layer)
        predicting_decoder_output, _, _ = \
            tf.contrib.seq2seq.dynamic_decode(predicting_decoder,
                                              impute_finished=True, maximum_iterations=max_target_sequence_length)

    return training_decoder_output, predicting_decoder_output
def decoding_layer(target_letter_to_int, decoding_embedding_size, num_layers,
                   rnn_size, target_sequence_length,
                   max_target_sequence_length, enc_state, dec_input):
    # 1. Decoder Embedding
    target_vocab_size = len(target_letter_to_int)
    dec_embeddings = tf.Variable(
        tf.random_uniform([target_vocab_size, decoding_embedding_size]))
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)

    # 2. Construct the decoder cell
    def make_cell(rnn_size):
        dec_cell = tf.contrib.rnn.LSTMCell(
            rnn_size,
            initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
        return dec_cell

    dec_cell = tf.contrib.rnn.MultiRNNCell(
        [make_cell(rnn_size) for _ in range(num_layers)])

    # 3. Dense layer to translate the decoder's output at each time
    # step into a choice from the target vocabulary
    output_layer = Dense(target_vocab_size,
                         kernel_initializer=tf.truncated_normal_initializer(
                             mean=0.0, stddev=0.1))

    # 4. Set up a training decoder and an inference decoder
    # Training Decoder
    with tf.variable_scope("decode"):
        # Helper for the training process. Used by BasicDecoder to read inputs.
        training_helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=dec_embed_input,
            sequence_length=target_sequence_length,
            time_major=False)

        # Basic decoder
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
            dec_cell, training_helper, enc_state, output_layer)

        # Perform dynamic decoding using the decoder
        training_decoder_output = tf.contrib.seq2seq.dynamic_decode(
            training_decoder,
            impute_finished=True,
            maximum_iterations=max_target_sequence_length)[0]
    # 5. Inference Decoder
    # Reuses the same parameters trained by the training process
    with tf.variable_scope("decode", reuse=True):
        start_tokens = tf.tile(tf.constant([target_letter_to_int['<GO>']],
                                           dtype=tf.int32), [batch_size],
                               name='start_tokens')

        # Helper for the inference process.
        inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            dec_embeddings, start_tokens, target_letter_to_int['<EOS>'])

        # Basic decoder
        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
            dec_cell, inference_helper, enc_state, output_layer)

        # Perform dynamic decoding using the decoder
        inference_decoder_output = tf.contrib.seq2seq.dynamic_decode(
            inference_decoder,
            impute_finished=True,
            maximum_iterations=max_target_sequence_length)[0]

    return training_decoder_output, inference_decoder_output
    def model(self):

        # 将来替换为record input
        inputs = tf.placeholder(dtype=tf.int32,
                                shape=(FLAGS.batch_size, FLAGS.en_max_length))
        targets = tf.placeholder(dtype=tf.int32,
                                 shape=(FLAGS.batch_size, FLAGS.zh_max_length))
        start_tokens = tf.constant(0, dtype=tf.int32)
        end_token = tf.constant(0, dtype=tf.int32)
        en_len_sequence = tf.placeholder(dtype=tf.int32,
                                         shape=FLAGS.batch_size)
        zh_len_sequence = tf.placeholder(dtype=tf.int32,
                                         shape=FLAGS.batch_size,
                                         name='batch_seq_length')

        en_embedding_matrix = tf.get_variable(
            name='embedding_matrix',
            shape=(FLAGS.en_vocab_size, FLAGS.en_embedded_size),
            dtype=tf.float32,
            # regularizer=tf.nn.l2_loss,
            initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
        zh_embedding_matrix = tf.get_variable(
            name='zh_embedding_matrix',
            shape=(FLAGS.zh_vocab_size, FLAGS.zh_embedded_size),
            dtype=tf.float32,
            # regularizer=tf.nn.l2_loss,
            initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))

        tf.add_to_collection(tf.GraphKeys.LOSSES,
                             tf.nn.l2_loss(en_embedding_matrix))
        tf.add_to_collection(tf.GraphKeys.LOSSES,
                             tf.nn.l2_loss(zh_embedding_matrix))

        tf.summary.histogram('zh_embedding_matrix', zh_embedding_matrix)
        tf.summary.histogram('en_embedding_matrix', en_embedding_matrix)
        with tf.device('/cpu:0'):
            embedded = tf.nn.embedding_lookup(en_embedding_matrix, inputs)
            target_embedded = tf.nn.embedding_lookup(zh_embedding_matrix,
                                                     targets)

        with tf.name_scope("encoder"):
            cells_fw = [
                tf.contrib.rnn.GRUCell(num) for num in config.encoder_fw_units
            ]
            cells_bw = [
                tf.contrib.rnn.GRUCell(num) for num in config.encoder_bw_units
            ]
            outputs, states_fw, states_bw = \
                tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw,
                                                               cells_bw,
                                                               embedded,
                                                               dtype=tf.float32,
                                                               sequence_length=en_len_sequence)

            dense_fw = tf.concat(states_fw, axis=1)
            dense_bw = tf.concat(states_bw, axis=1)
            states = tf.concat([dense_bw, dense_fw], axis=1)

            tf.summary.histogram('encoder_state', states)
        '''
        external memory will add here
        '''

        with tf.name_scope("decoder"):
            attention_m = \
                tf.contrib.seq2seq.BahdanauAttention(
                    FLAGS.attention_size,
                    outputs,
                    en_len_sequence)
            cell_out = [
                tf.contrib.rnn.GRUCell(num) for num in config.out_cell_units
            ]
            cell_attention = \
                [tf.contrib.seq2seq.AttentionWrapper(
                    cell_out[i], attention_m) for i in range(len(config.out_cell_units))]
            cells = tf.contrib.rnn.MultiRNNCell(cell_attention)

            initial_state = cells.zero_state(dtype=tf.float32,
                                             batch_size=FLAGS.batch_size)
            initial_state = list(initial_state)
            initial_state[0] = initial_state[0].clone(cell_state=states)
            initial_state = tuple(initial_state)

            if FLAGS.is_inference:
                helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    zh_embedding_matrix, start_tokens, end_token)
            else:
                helper = tf.contrib.seq2seq.TrainingHelper(
                    target_embedded, zh_len_sequence)

            dense = Dense(FLAGS.zh_vocab_size)
            decoder = tf.contrib.seq2seq.BasicDecoder(cells, helper,
                                                      initial_state, dense)
            logits, final_states, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
                decoder)
            weights = tf.constant(
                1.0, shape=[FLAGS.batch_size, FLAGS.zh_max_length])
            inference_losses = tf.contrib.seq2seq.sequence_loss(
                logits.rnn_output, targets, weights)
            tf.summary.scalar('inference_loss', inference_losses)
            tf.add_to_collection(tf.GraphKeys.LOSSES, inference_losses)
            losses = tf.add_n(tf.get_collection(tf.GraphKeys.LOSSES))
            tf.summary.scalar('losses', losses)
            eval = sequence_equal(logits.sample_id, targets)
            tf.summary.scalar('eval', eval)

            global_step = tf.train.get_or_create_global_step()

            learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                       global_step,
                                                       FLAGS.decay_step,
                                                       FLAGS.decay_rate)
            tf.summary.scalar('learning_rate', learning_rate)

            opt = tf.train.GradientDescentOptimizer(learning_rate)

            grads_and_vars = opt.compute_gradients(losses)
            clipped_grads_and_vars = tf.contrib.training.clip_gradient_norms(
                grads_and_vars, FLAGS.max_gradient)
            apply_grads_op = opt.apply_gradients(clipped_grads_and_vars,
                                                 global_step)

            summary_op = tf.summary.merge_all()

            if FLAGS.is_inference:
                return logits.sample_id
            elif FLAGS.is_train:
                return [global_step, eval, losses, apply_grads_op, summary_op]
            else:
                return [global_step, eval, losses]