Beispiel #1
0
        def seq2seq_f(lstm_inputs, decoder_inputs, seq_length, do_decode):

            num_hidden = attn_num_layers * attn_num_hidden
            lstm_fw_cell = BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False)
            # Backward direction cell
            lstm_bw_cell = BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False)

            pre_encoder_inputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, lstm_inputs,
                initial_state_fw=None, initial_state_bw=None,
                dtype=tf.float32, sequence_length=None, scope=None)

            encoder_inputs = [e*f for e,f in zip(pre_encoder_inputs,encoder_masks[:seq_length])]
            top_states = [array_ops.reshape(e, [-1, 1, num_hidden*2])
                    for e in encoder_inputs]
            attention_states = array_ops.concat(top_states, 1)
            initial_state = tf.concat(axis=1, values=[output_state_fw, output_state_bw])
            outputs, _, attention_weights_history = embedding_attention_decoder(
                    decoder_inputs, initial_state, attention_states, cell,
                    num_symbols=target_vocab_size, 
                    embedding_size=target_embedding_size,
                    num_heads=1,
                    output_size=target_vocab_size, 
                    output_projection=None,
                    feed_previous=do_decode,
                    initial_state_attention=False,
                    attn_num_hidden = attn_num_hidden)
            return outputs, attention_weights_history
Beispiel #2
0
    def __init__(self, config):
        self._config = config
        self._kernel_size = 3

        self._train_data, _ = self.get_video_data()
        self._nr_training_examples = self._train_data.shape[0]
        self._train_data = self.shuffle_train_data(self._train_data)

        self._word_to_index, self._index_to_word, self._bias_init_vector, self._caption_matrix, self._longest_caption = \
          self.get_caption_dicts(self._train_data)

        self._nr_words = len(self._word_to_index)

        self._W_emb = tf.get_variable(tf.random_uniform(
            [self._nr_words, self._config.dim_hidden], -0.1, 0.1),
                                      name='W_emb')

        self._lstm = BasicLSTMCell(self._config.dim_hidden)

        self._encode_image_W = tf.get_variable(tf.random_uniform(
            [self._config.dim_video, self._config.dim_hidden], -0.1, 0.1),
                                               name='encode_image_W')
        self._encode_image_b = tf.get_variable(tf.zeros(
            [self._config.dim_hidden]),
                                               name='encode_image_b')

        self._embed_word_W = tf.get_variable(tf.random_uniform(
            [self._config.dim_hidden, self._nr_words], -0.1, 0.1),
                                             name='embed_word_W')
        self._embed_word_b = tf.get_variable(tf.zeros([self._nr_words]),
                                             name='embed_word_b')

        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * self._config.learning_rate_decay_factor)
Beispiel #3
0
    def define_basic_lstm_cell(self):
        #num_units = self.config.hidden_size * 2
        lstm_cell = BasicLSTMCell(self.config.hidden_size, forget_bias=0.0)
        if self.is_training and self.config.keep_prob < 1.0:
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=self.config.keep_prob)

        return tf.nn.rnn_cell.MultiRNNCell([lstm_cell] *
                                           self.config.num_layers)
Beispiel #4
0
class RecurrentController(BaseController):
    def network_vars(self):
        self.lstm_cell = BasicLSTMCell(256)
        self.state = self.lstm_cell.zero_state(self.batch_size, tf.float32)

    def network_op(self, X, state):
        X = tf.convert_to_tensor(X)
        return self.lstm_cell(X, state)

    def get_state(self):
        return self.state

    def update_state(self, new_state):
        return tf.no_op()
Beispiel #5
0
def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    # x = tf.split(x, n_steps, 0)
    x = tf.split(0, n_steps, x)

    # Define a lstm cell with tensorflow
    lstm_cell = BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
Beispiel #6
0
 def __init__(self, features, batch_size):
     self.cell = BasicLSTMCell(features)
     self.h = tf.Variable(tf.zeros([batch_size, features]), trainable=False)
     self.c = tf.Variable(tf.zeros([batch_size, features]), trainable=False)
     self.batch_size = batch_size
     self.features = features
Beispiel #7
0
 def network_vars(self):
     self.lstm_cell = BasicLSTMCell(256)
     self.state = self.lstm_cell.zero_state(self.batch_size, tf.float32)
Beispiel #8
0
    def __init__(self, encoder_masks, encoder_inputs_tensor, 
            decoder_inputs,
            target_weights,
            target_vocab_size, 
            buckets,
            target_embedding_size,
            attn_num_layers,
            attn_num_hidden,
            forward_only,
            use_gru):
        """Create the model.

        Args:
          source_vocab_size: size of the source vocabulary.
          target_vocab_size: size of the target vocabulary.
          buckets: a list of pairs (I, O), where I specifies maximum input length
            that will be processed in that bucket, and O specifies maximum output
            length. Training instances that have inputs longer than I or outputs
            longer than O will be pushed to the next bucket and padded accordingly.
            We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
          size: number of units in each layer of the model.
          num_layers: number of layers in the model.
          max_gradient_norm: gradients will be clipped to maximally this norm.
          learning_rate: learning rate to start with.
          learning_rate_decay_factor: decay learning rate by this much when needed.
          use_lstm: if true, we use LSTM cells instead of GRU cells.
          num_samples: number of samples for sampled softmax.
          forward_only: if set, we do not construct the backward pass in the model.
        """
        self.encoder_inputs_tensor = encoder_inputs_tensor
        self.decoder_inputs = decoder_inputs
        self.target_weights = target_weights
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.encoder_masks = encoder_masks

        # Create the internal multi-layer cell for our RNN.
        single_cell = BasicLSTMCell(attn_num_hidden, forget_bias=0.0, state_is_tuple=False)
        if use_gru:
            print("using GRU CELL in decoder")
            single_cell = GRUCell(attn_num_hidden)
        cell = single_cell

        if attn_num_layers > 1:
            cell = MultiRNNCell([single_cell] * attn_num_layers, state_is_tuple=False)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(lstm_inputs, decoder_inputs, seq_length, do_decode):

            num_hidden = attn_num_layers * attn_num_hidden
            lstm_fw_cell = BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False)
            # Backward direction cell
            lstm_bw_cell = BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False)

            pre_encoder_inputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, lstm_inputs,
                initial_state_fw=None, initial_state_bw=None,
                dtype=tf.float32, sequence_length=None, scope=None)

            encoder_inputs = [e*f for e,f in zip(pre_encoder_inputs,encoder_masks[:seq_length])]
            top_states = [array_ops.reshape(e, [-1, 1, num_hidden*2])
                    for e in encoder_inputs]
            attention_states = array_ops.concat(top_states, 1)
            initial_state = tf.concat(axis=1, values=[output_state_fw, output_state_bw])
            outputs, _, attention_weights_history = embedding_attention_decoder(
                    decoder_inputs, initial_state, attention_states, cell,
                    num_symbols=target_vocab_size, 
                    embedding_size=target_embedding_size,
                    num_heads=1,
                    output_size=target_vocab_size, 
                    output_projection=None,
                    feed_previous=do_decode,
                    initial_state_attention=False,
                    attn_num_hidden = attn_num_hidden)
            return outputs, attention_weights_history

        # Our targets are decoder inputs shifted by one.
        targets = [decoder_inputs[i + 1]
                for i in xrange(len(decoder_inputs) - 1)]

        softmax_loss_function = None # default to tf.nn.sparse_softmax_cross_entropy_with_logits

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses, self.attention_weights_histories = model_with_buckets(
                    encoder_inputs_tensor, decoder_inputs, targets,
                    self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, True),
                    softmax_loss_function=softmax_loss_function)
        else:
            self.outputs, self.losses, self.attention_weights_histories = model_with_buckets(
                    encoder_inputs_tensor, decoder_inputs, targets,
                    self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, False),
                    softmax_loss_function=softmax_loss_function)
Beispiel #9
0
for i in range(len(Questions_Types)):
    Questions_Types_embedding.append(
        embeddings_index[Questions_Types[i].lower()])

#Question Type Encoder
# Here I have to pass these Question types embeddings e_ti to the fully connected neural network to get the internal vector representation qt_i
Topic_internal_vector = FC(Questions_Types_embedding, 7)

# Number of hidden units.
from tensorflow.python.ops.rnn_cell import BasicLSTMCell

encoder_hidden_units = 600
decoder_hidden_units = 600

# BLSTM Encoder encoding the Question Topic
forward_topics_encoder_cell = BasicLSTMCell(encoder_hidden_units)
backward_topics_encoder_cell = BasicLSTMCell(encoder_hidden_units)

(
    (
        encoder_topics_fw_outputs,  # Contains the outputs of the BLSTM.
        encoder_topics_bw_outputs),
    (
        encoder_topics_fw_final_state,  # Contains the last hidden state of the BLSTM.
        encoder_topics_bw_final_state)) = (
            tf.nn.bidirectional_dynamic_rnn(
                cell_fw=forward_topics_encoder_cell,
                cell_bw=backward_topics_encoder_cell,
                inputs=Topics,  # Topics
                dtype=tf.float32,
                time_major=True))
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, d, dc, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.hidden_size, \
            config.char_emb_size, config.max_word_size
        H = config.max_tree_height

        x_mask = self.x > 0
        q_mask = self.q > 0
        tx_mask = self.tx > 0  # [N, M, H, JX]

        with tf.variable_scope("char_emb"):
            char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')
            Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
            Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]

            filter = tf.get_variable("filter", shape=[1, config.char_filter_height, dc, d], dtype='float')
            bias = tf.get_variable("bias", shape=[d], dtype='float')
            strides = [1, 1, 1, 1]
            Acx = tf.reshape(Acx, [-1, JX, W, dc])
            Acq = tf.reshape(Acq, [-1, JQ, W, dc])
            xxc = tf.nn.conv2d(Acx, filter, strides, "VALID") + bias  # [N*M, JX, W/filter_stride, d]
            qqc = tf.nn.conv2d(Acq, filter, strides, "VALID") + bias  # [N, JQ, W/filter_stride, d]
            xxc = tf.reshape(tf.reduce_max(tf.nn.relu(xxc), 2), [-1, M, JX, d])
            qqc = tf.reshape(tf.reduce_max(tf.nn.relu(qqc), 2), [-1, JQ, d])

        with tf.variable_scope("word_emb"):
            if config.mode == 'train':
                word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, config.word_emb_size], initializer=get_initializer(config.emb_mat))
            else:
                word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, config.word_emb_size], dtype='float')
            Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d]
            Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d]
            # Ax = linear([Ax], d, False, scope='Ax_reshape')
            # Aq = linear([Aq], d, False, scope='Aq_reshape')

        xx = tf.concat(3, [xxc, Ax])  # [N, M, JX, 2d]
        qq = tf.concat(2, [qqc, Aq])  # [N, JQ, 2d]
        D = d + config.word_emb_size

        with tf.variable_scope("pos_emb"):
            pos_emb_mat = tf.get_variable("pos_emb_mat", shape=[config.pos_vocab_size, d], dtype='float')
            Atx = tf.nn.embedding_lookup(pos_emb_mat, self.tx)  # [N, M, H, JX, d]

        cell = BasicLSTMCell(D, state_is_tuple=True)
        cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("rnn"):
            (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='start')  # [N, M, JX, 2d]
            tf.get_variable_scope().reuse_variables()
            (fw_us, bw_us), (_, (fw_u, bw_u)) = bidirectional_dynamic_rnn(cell, cell, qq, q_len, dtype='float', scope='start')  # [N, J, d], [N, d]
            u = (fw_u + bw_u) / 2.0
            h = (fw_h + bw_h) / 2.0

        with tf.variable_scope("h"):
            no_op_cell = NoOpCell(D)
            tree_rnn_cell = TreeRNNCell(no_op_cell, d, tf.reduce_max)
            initial_state = tf.reshape(h, [N*M*JX, D])  # [N*M*JX, D]
            inputs = tf.concat(4, [Atx, tf.cast(self.tx_edge_mask, 'float')])  # [N, M, H, JX, d+JX]
            inputs = tf.reshape(tf.transpose(inputs, [0, 1, 3, 2, 4]), [N*M*JX, H, d + JX])  # [N*M*JX, H, d+JX]
            length = tf.reshape(tf.reduce_sum(tf.cast(tx_mask, 'int32'), 2), [N*M*JX])
            # length = tf.reshape(tf.reduce_sum(tf.cast(tf.transpose(tx_mask, [0, 1, 3, 2]), 'float'), 3), [-1])
            h, _ = dynamic_rnn(tree_rnn_cell, inputs, length, initial_state=initial_state)  # [N*M*JX, H, D]
            h = tf.transpose(tf.reshape(h, [N, M, JX, H, D]), [0, 1, 3, 2, 4])  # [N, M, H, JX, D]

        u = tf.expand_dims(tf.expand_dims(tf.expand_dims(u, 1), 1), 1)  # [N, 1, 1, 1, 4d]
        dot = linear(h * u, 1, True, squeeze=True, scope='dot')  # [N, M, H, JX]
        # self.logits = tf.reshape(dot, [N, M * H * JX])
        self.logits = tf.reshape(exp_mask(dot, tx_mask), [N, M * H * JX])  # [N, M, H, JX]
        self.yp = tf.reshape(tf.nn.softmax(self.logits), [N, M, H, JX])
Beispiel #11
0
 def __init__(self, kwd_voc_size, *args, **kwargs):
     BasicLSTMCell.__init__(self, *args, **kwargs)
     self.key_words_voc_size = kwd_voc_size
in_onehot = tf.one_hot(in_ph, vocab_size, name="input_onehot")

inputs = tf.split(in_onehot, sequence_length, axis=1)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
targets = tf.split(targ_ph, sequence_length, axis=1)

# at this point, inputs is a list of length sequence_length
# each element of inputs is [batch_size,vocab_size]

# targets is a list of length sequence_length
# each element of targets is a 1D vector of length batch_size

# ------------------
# YOUR COMPUTATION GRAPH HERE

cell0 = BasicLSTMCell(state_dim)
cell1 = BasicLSTMCell(state_dim)
multi_cell = tf.contrib.rnn.MultiRNNCell([cell0, cell1])
initial_state = multi_cell.zero_state(batch_size, tf.float32)
#######################################################################
#and add an encoder for the decoder...
seq_out = tf.contrib.legacy_seq2seq.rnn_decoder(inputs, initial_state,
                                                multi_cell)
seq_out = tf.reshape(seq_out, (50, 50, 128, 1))
initializer = tf.contrib.layers.variance_scaling_initializer()
logits = tf.contrib.layers.fully_connected(seq_out,
                                           vocab_size,
                                           activation_fn=None,
                                           weights_initializer=initializer,
                                           biases_initializer=initializer)
#logits =
Beispiel #13
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            0, [word_emb_mat, self.new_emb_mat])
                    print(word_emb_mat.get_shape().as_list())

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq

                xx = Ax
                qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        cell = BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(2, [fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0, p1 = attention_layer(config,
                                         self.is_train,
                                         h,
                                         u,
                                         h_mask=self.x_mask,
                                         u_mask=self.q_mask,
                                         scope="p0",
                                         tensor_dict=self.tensor_dict)
                first_cell = d_cell
        # with tf.variable_scope("activate"):
        #     p0 =  tf.nn.relu(_linear(tf.reshape(p0,[-1,1200]),300,bias=0.01,bias_start=0.0,scope='relu'))
        #     if config.share_lstm_weights:
        #         tf.get_variable_scope().reuse_variables()
        #         p1 =  tf.nn.relu(_linear(tf.reshape(p1,[-1,1200]),300,bias=0.01,bias_start=0.0,scope='relu'))
        with tf.variable_scope('two_lstm'):
            p0 = tf.reshape(p0, [N, 1, -1, 300])
            p1 = tf.reshape(p1, [N, 1, -1, 300])
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            q_len_new = tf.tile(tf.expand_dims(q_len, 1), [1, M])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                    first_cell,
                    first_cell,
                    p1,
                    q_len_new,
                    dtype='float',
                    scope='g0')  # [N, M, JX, 2d]
                g1 = tf.concat(3, [fw_g1, bw_g1])
        # with tf.variable_scope('two_lstm_1'):
        #     (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(first_cell, first_cell, g0, x_len, dtype='float', scope='g0')  # [N, M, JX, 2d]
        #     g2 = tf.concat(3, [fw_g2, bw_g2])
        #     q_len_new = tf.tile(tf.expand_dims(q_len,1),[1,M])
        #     if config.share_lstm_weights:
        #         tf.get_variable_scope().reuse_variables()
        #         (fw_g3, bw_g3), _ = bidirectional_dynamic_rnn(first_cell, first_cell, g1, q_len_new, dtype='float', scope='g0')  # [N, M, JX, 2d]
        #         g3 = tf.concat(3, [fw_g3, bw_g3])

            g0 = tf.reduce_sum(tf.reduce_max(g0, 2), 1)
            g1 = tf.reduce_sum(tf.reduce_max(g1, 2), 1)

        logits = _linear([g0, g1, tf.abs(tf.subtract(g0, g1)), g0 * g1],
                         2,
                         bias=0.01,
                         bias_start=0.0,
                         scope='logits1')

        flat_logits2 = tf.reshape(logits, [N, 2])

        yp = tf.nn.softmax(flat_logits2)  # [-1, M*JX]

        self.tensor_dict['g0'] = g0
        self.tensor_dict['g1'] = g1

        self.logits = flat_logits2

        self.yp = yp
Beispiel #14
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            0, [word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(3, [xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(2, [qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell = BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(2, [fw_u, bw_u])
            if config.two_prepro_layers:
                (fw_u, bw_u), ((_, fw_u_f),
                               (_, bw_u_f)) = bidirectional_dynamic_rnn(
                                   d_cell,
                                   d_cell,
                                   u,
                                   q_len,
                                   dtype='float',
                                   scope='u2')  # [N, J, d], [N, d]
                u = tf.concat(2, [fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
                if config.two_prepro_layers:
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell, cell, h, x_len, dtype='float',
                        scope='u2')  # [N, M, JX, 2d]
                    h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]

            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
                if config.two_prepro_layers:
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell, cell, h, x_len, dtype='float',
                        scope='h2')  # [N, M, JX, 2d]
                    h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell = d_cell
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(3, [fw_g1, bw_g1])

            if config.late:
                (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                    d_cell,
                    d_cell,
                    tf.concat(3, [g1, p0]),
                    x_len,
                    dtype='float',
                    scope='g2')  # [N, M, JX, 2d]
                g2 = tf.concat(3, [fw_g2, bw_g2])
                # logits2 = u_logits(config, self.is_train, tf.concat(3, [g1, a1i]), u, h_mask=self.x_mask, u_mask=self.q_mask, scope="logits2")

                logits = get_logits([g1, g2, p0],
                                    d,
                                    True,
                                    wd=config.wd,
                                    input_keep_prob=config.input_keep_prob,
                                    mask=self.x_mask,
                                    is_train=self.is_train,
                                    func=config.answer_func,
                                    scope='logits1')

                if config.feed_gt:
                    logy = tf.log(tf.cast(self.y, 'float') + VERY_SMALL_NUMBER)
                    logits = tf.cond(self.is_train, lambda: logy,
                                     lambda: logits)
                if config.feed_hard:
                    hard_yp = tf.argmax(tf.reshape(logits, [N, M * JX]), 1)
                    hard_logits = tf.reshape(tf.one_hot(hard_yp, M * JX),
                                             [N, M, JX])  # [N, M, JX]
                    logits = tf.cond(self.is_train, lambda: logits,
                                     lambda: hard_logits)

                flat_logits = tf.reshape(logits, [-1, M * JX])
                flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
                yp = tf.reshape(flat_yp, [-1, M, JX])

                logits2 = get_logits([g1, g2, p0],
                                     d,
                                     True,
                                     wd=config.wd,
                                     input_keep_prob=config.input_keep_prob,
                                     mask=self.x_mask,
                                     is_train=self.is_train,
                                     func=config.answer_func,
                                     scope='logits2')

                flat_logits2 = tf.reshape(logits2, [-1, M * JX])
                flat_yp2 = tf.nn.softmax(flat_logits2)
                yp2 = tf.reshape(flat_yp2, [-1, M, JX])
            else:
                logits = get_logits([g1, p0],
                                    d,
                                    True,
                                    wd=config.wd,
                                    input_keep_prob=config.input_keep_prob,
                                    mask=self.x_mask,
                                    is_train=self.is_train,
                                    func=config.answer_func,
                                    scope='logits1')
                a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                              tf.reshape(logits, [N, M * JX]))

                if config.feed_gt:
                    logy = tf.log(tf.cast(self.y, 'float') + VERY_SMALL_NUMBER)
                    logits = tf.cond(self.is_train, lambda: logy,
                                     lambda: logits)
                if config.feed_hard:
                    hard_yp = tf.argmax(tf.reshape(logits, [N, M * JX]), 1)
                    hard_logits = tf.reshape(tf.one_hot(hard_yp, M * JX),
                                             [N, M, JX])  # [N, M, JX]
                    logits = tf.cond(self.is_train, lambda: logits,
                                     lambda: hard_logits)

                flat_logits = tf.reshape(logits, [-1, M * JX])
                flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
                yp = tf.reshape(flat_yp, [-1, M, JX])

                a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                              [1, M, JX, 1])
                yp_aug = tf.expand_dims(yp, -1)
                g1yp = g1 * yp_aug
                if config.prev_mode == 'a':
                    prev = a1i
                elif config.prev_mode == 'y':
                    prev = yp_aug
                elif config.prev_mode == 'gy':
                    prev = g1yp
                else:
                    raise Exception()
                (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                    d_cell,
                    d_cell,
                    tf.concat(3, [p0, g1, prev, g1 * prev]),
                    x_len,
                    dtype='float',
                    scope='g2')  # [N, M, JX, 2d]
                g2 = tf.concat(3, [fw_g2, bw_g2])
                # logits2 = u_logits(config, self.is_train, tf.concat(3, [g1, a1i]), u, h_mask=self.x_mask, u_mask=self.q_mask, scope="logits2")
                logits2 = get_logits([g2, p0],
                                     d,
                                     True,
                                     wd=config.wd,
                                     input_keep_prob=config.input_keep_prob,
                                     mask=self.x_mask,
                                     is_train=self.is_train,
                                     func=config.answer_func,
                                     scope='logits2')

                flat_logits2 = tf.reshape(logits2, [-1, M * JX])
                flat_yp2 = tf.nn.softmax(flat_logits2)
                yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
Beispiel #15
0
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
targets = tf.split(1, sequence_length, targ_ph)

# at this point, inputs is a list of length sequence_length
# each element of inputs is [batch_size,vocab_size]

# targets is a list of length sequence_length
# each element of targets is a 1D vector of length batch_size

# ------------------
# YOUR COMPUTATION GRAPH HERE

with tf.variable_scope("Graph_") as scope:

    # create a BasicLSTMCell
    cell = BasicLSTMCell(state_dim, state_is_tuple=True)
    #   use it to create a MultiRNNCell
    #tf.nn.rnn_cell.MultiRNNCell.__init__(cells, state_is_tuple=False)
    stacked_lstm = tf.nn.rnn_cell.MultiRNNCell([cell] * 2, state_is_tuple=True)
    #stacked_lstm = tf.nn.rnn_cell.MultiRNNCell([cell]*2)
    #   use it to create an initial_state
    initial_state = stacked_lstm.zero_state(batch_size, tf.float32)
    #note that initial_state will be a *list* of tensors!

    # call seq2seq.rnn_decoder
    # rnn_decoder(decoder_inputs, initial_state, cell, loop_function=None, scope=None):
    #outputs, state = tf.nn.seq2seq.rnn_decoder(inputs, initial_state, stacked_lstm, loop_function=None, scope=None)
    outputs, state = tf.nn.seq2seq.rnn_decoder(inputs,
                                               initial_state,
                                               stacked_lstm,
                                               loop_function=None,
Beispiel #16
0
    def __init__(self,
                 source_vocab_size,
                 target_vocab_size,
                 buckets,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 use_lstm=True,
                 num_samples=512,
                 forward_only=False):
        """Create the model.

    Args:
      source_vocab_size: size of the source vocabulary.
      target_vocab_size: size of the target vocabulary.
      buckets: a list of pairs (I, O), where I specifies maximum input length
        that will be processed in that bucket, and O specifies maximum output
        length. Training instances that have inputs longer than I or outputs
        longer than O will be pushed to the next bucket and padded accordingly.
        We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
      size: number of units in each layer of the model.
      num_layers: number of layers in the model.
      max_gradient_norm: gradients will be clipped to maximally this norm.
      batch_size: the size of the batches used during training;
        the model construction is independent of batch_size, so it can be
        changed after initialization if this is convenient, e.g., for decoding.
      learning_rate: learning rate to start with.
      learning_rate_decay_factor: decay learning rate by this much when needed.
      use_lstm: if true, we use LSTM cells instead of GRU cells.
      num_samples: number of samples for sampled softmax.
      forward_only: if set, we do not construct the backward pass in the model.
    """
        self.source_vocab_size = source_vocab_size
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None
        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.target_vocab_size:
            w = tf.get_variable("proj_w", [size, self.target_vocab_size])
            w_t = tf.transpose(w)
            b = tf.get_variable("proj_b", [self.target_vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                labels = tf.reshape(labels, [-1, 1])
                return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                  num_samples,
                                                  self.target_vocab_size)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        output_keep_prob = tf.constant(0.8)
        single_cell = GRUCell(size)
        # Add dropout layer for regularization.
        single_cell = DropoutWrapper(single_cell,
                                     output_keep_prob=output_keep_prob)
        if use_lstm:
            single_cell = BasicLSTMCell(size)
            single_cell = DropoutWrapper(single_cell,
                                         output_keep_prob=output_keep_prob)
        cell = single_cell
        if num_layers > 1:
            cell = MultiRNNCell([single_cell] * num_layers)

        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return tf.nn.seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                num_encoder_symbols=source_vocab_size,
                num_decoder_symbols=target_vocab_size,
                embedding_size=size,
                output_projection=output_projection,
                feed_previous=do_decode)

        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
        for i in xrange(buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))

        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        if forward_only:
            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, True),
                softmax_loss_function=softmax_loss_function)

            if output_projection is not None:
                for b in xrange(len(buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]
        else:
            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, False),
                softmax_loss_function=softmax_loss_function)

        params = tf.trainable_variables()
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            for b in xrange(len(buckets)):
                gradients = tf.gradients(self.losses[b], params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables())
Beispiel #17
0
    def _build_forward(self):
        config = self.config

        N = config.batch_size
        M = config.max_num_sents
        JX = config.max_sent_size
        JQ = config.max_ques_size
        VW = config.word_vocab_size
        VC = config.char_vocab_size
        W = config.max_word_size
        d = config.hidden_size

        JX = tf.shape(self.x)[2]  # JX max sentence size, length,
        JQ = tf.shape(self.q)[1]  # JQ max questions size, length, is the
        M = tf.shape(self.x)[1]  # m is the max number of sentences
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size
        # dc = 8, each char will be map to 8-number vector,  "char-level word embedding size [100]"
        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')
                    # 330,8 a matrix for each char to its 8-number vector

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)
                    # [N, M, JX, W, dc] 60,None,None,16,8, batch-size,
                    # N is the number of batch_size
                    # M the max number of sentences
                    # JX is the max sentence length
                    # W is  the max length of a word
                    # dc is the vector for each char
                    # map each char to a vector

                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    # JQ the max length of question
                    # W the max length of words
                    # mao each char in questiosn to vectors

                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])
                    # max questions size, length, max_word_size(16), char_emb_size(8)

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    # so here, there are 100 filters and the size of each filter is 5
                    # different heights and there are different number of these filter, but here just 100 5-long filters

                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(
                            qq, [-1, JQ, dco
                                 ])  # here, xx and qq are the output of cnn,

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:  # create a new word embedding or use the glove?
                        word_emb_mat = tf.concat(
                            [word_emb_mat, self.new_emb_mat], 0)

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                    qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq  # here we used cnn and word embedding represented each word with a 200-unit vector
        # so for, xx, (batch_size, sentence#, word#, embedding), qq (batch_size, word#, embedding)
        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq
        # same shape with line 173
        cell = BasicLSTMCell(
            d, state_is_tuple=True)  # d = 100, hidden state number
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'),
                              2)  # [N, M], [60,?]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N] [60]
        # masks are true and false, here, he sums up those truths,
        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(
                [fw_u, bw_u],
                2)  # (60, ?, 200) |  200 becahse combined 2 100 hidden states
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u  # [60, ?, 200] for question
            self.tensor_dict['h'] = h  # [60, ?, ?, 200] for article

        with tf.variable_scope("main"):
            if config.dynamic_att:  # todo what is this dynamic attention.
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                cell2 = BasicLSTMCell(
                    d, state_is_tuple=True)  # d = 100, hidden state number
                first_cell = SwitchableDropoutWrapper(
                    cell2,
                    self.is_train,
                    input_keep_prob=config.input_keep_prob)

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell,
                first_cell,
                inputs=p0,
                sequence_length=x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]

            g0 = tf.concat([fw_g0, bw_g0], 3)
            cell3 = BasicLSTMCell(
                d, state_is_tuple=True)  # d = 100, hidden state number
            first_cell3 = SwitchableDropoutWrapper(
                cell3, self.is_train, input_keep_prob=config.input_keep_prob)

            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell3, first_cell3, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat([fw_g1, bw_g1], 3)

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])
            cell4 = BasicLSTMCell(
                d, state_is_tuple=True)  # d = 100, hidden state number
            first_cell4 = SwitchableDropoutWrapper(
                cell4, self.is_train, input_keep_prob=config.input_keep_prob)

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                first_cell4,
                first_cell4,
                tf.concat([p0, g1, a1i, g1 * a1i], 3),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat([fw_g2, bw_g2], 3)
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            yp = tf.reshape(flat_yp, [-1, M, JX])
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
Beispiel #18
0
    def decode(self, knowledge_rep, masks, is_train, hparams):
        """
        takes in a knowledge representation
        and output a probability estimation over
        all paragraph tokens on which token should be
        the start of the answer span, and which should be
        the end of the answer span.

        :param knowledge_rep: it is a representation of the paragraph and question,
                              decided by how you choose to implement the encoder
        :return:
        """
        p0 = knowledge_rep
        p_mask, q_mask = masks
        batch_size = hparams.batch_size
        input_keep_prob = hparams.input_keep_prob

        p_len = tf.reduce_sum(tf.cast(p_mask, 'int32'), 1)  # [N]
        q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1)  # [N]

        JX = tf.shape(p_mask)[1]

        with tf.variable_scope("main"):
            cell = BasicLSTMCell(self.state_size, state_is_tuple=True)
            first_cell = SwitchableDropoutWrapper(cell, is_train, input_keep_prob=input_keep_prob)

            # [N, JX, 2d]
            (fw_g0, bw_g0), _ = _bidirectional_dynamic_rnn(first_cell, first_cell, p0, 
                                                           p_len, dtype='float', scope='g0') 
            g0 = tf.concat([fw_g0, bw_g0], 2)
            
            cell = BasicLSTMCell(self.state_size, state_is_tuple=True)
            first_cell = SwitchableDropoutWrapper(cell, is_train, input_keep_prob=input_keep_prob)

            # [N, JX, 2d]
            (fw_g1, bw_g1), _ = _bidirectional_dynamic_rnn(first_cell, first_cell, g0, 
                                                           p_len, dtype='float', scope='g1')  
            g1 = tf.concat([fw_g1, bw_g1], 2)
            logits = linear_logits([g1, p0], self.state_size, 0.0, scope='logits1', 
                                   mask=p_mask, is_train=is_train)

            # TODO use batch _size
            a1i = softsel(tf.reshape(g1, [batch_size, JX, 2 * self.state_size]), 
                          tf.reshape(logits, [batch_size, JX]))

            a1i = tf.tile(tf.expand_dims(a1i, 1), [1, JX, 1])
            
            flat_logits1 = tf.reshape(logits, [-1, JX])
            flat_yp = tf.nn.softmax(flat_logits1)  # [-1, M*JX]
            yp1 = tf.reshape(flat_yp, [-1, JX])

            cell = BasicLSTMCell(self.state_size, state_is_tuple=True)
            d_cell = SwitchableDropoutWrapper(cell, is_train, input_keep_prob=input_keep_prob)

            # [N, M, JX, 2d]
            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(d_cell, d_cell, 
                                                          tf.concat([p0, g1, a1i, g1 * a1i], 2),
                                                          p_len, dtype='float', scope='g2') 
            g2 = tf.concat([fw_g2, bw_g2], 2)
            logits2 = linear_logits([g2, p0], self.state_size, 0.0, scope='logits2', 
                                    mask=p_mask, is_train=is_train)
            flat_logits2 = tf.reshape(logits2, [-1, JX])
            flat_yp = tf.nn.softmax(flat_logits2)  # [-1, M*JX]
            yp2 = tf.reshape(flat_yp, [-1, JX])
        return (yp1, flat_logits1), (yp2, flat_logits2)
Beispiel #19
0
    def encode(self, inputs, masks, encoder_state_input, is_train, hparams):
        """
        In a generalized encode function, you pass in your inputs,
        masks, and an initial
        hidden state input into this function.

        :param inputs: Symbolic representations of your input
        :param masks: this is to make sure tf.nn.dynamic_rnn doesn't iterate
                      through masked steps
        :param encoder_state_input: (Optional) pass this as initial hidden state
                                    to tf.nn.dynamic_rnn to build conditional representations
        :return: an encoded representation of your input.
                 It can be context-level representation, word-level representation,
                 or both.
        """

        context_embed, question_embed = inputs
        p_mask, q_mask = masks
        batch_size = hparams.batch_size,
        input_keep_prob = hparams.input_keep_prob

        cell = BasicLSTMCell(self.size, state_is_tuple=True)

        d_cell = SwitchableDropoutWrapper(cell, is_train, input_keep_prob=input_keep_prob)

        p_len = tf.reduce_sum(tf.cast(p_mask, 'int32'), 1)  # [N]
        q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1)  # [N]


        with tf.variable_scope('prepro'):
            
            # [N, J, d], [N, d]
            (fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn(
                d_cell, d_cell, question_embed, q_len, dtype='float', scope='u1') 

            tf.get_variable_scope().reuse_variables()

            # [N, JX, 2d]
            (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, context_embed, 
                                                         p_len, dtype='float', scope='u1')  
            u = tf.concat([fw_u, bw_u], 2)
            h = tf.concat([fw_h, bw_h], 2)

        # Attention Layer
        with tf.variable_scope('attention_layer'):
            JQ = tf.shape(u)[1]
            JX = tf.shape(h)[1]

            h_aug = tf.tile(tf.expand_dims(h, 2), [1, 1, JQ, 1])
            u_aug = tf.tile(tf.expand_dims(u, 1), [1, JX, 1, 1])

            h_mask_aug = tf.tile(tf.expand_dims(p_mask, 2), [1, 1, JQ])
            u_mask_aug = tf.tile(tf.expand_dims(q_mask, 1), [1, JX, 1])
            hu_mask = tf.cast(h_mask_aug, tf.bool) & tf.cast(u_mask_aug, tf.bool)
            hu_aug = h_aug * u_aug
            u_logits = linear_logits([h_aug, u_aug, hu_aug], True, scope='u_logits', 
                                     mask=hu_mask, is_train=is_train)

            u_a = softsel(u_aug, u_logits)  # [N, JX, d]
            h_a = softsel(h, tf.reduce_max(u_logits, 2))  # [N, d]
            h_a = tf.tile(tf.expand_dims(h_a, 1), [1, JX, 1])

            p0 = tf.concat([h, u_a, h * u_a, h * h_a], 2)
        return p0
Beispiel #20
0
inputs = tf.split(1, sequence_length, in_onehot)
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
targets = tf.split(1, sequence_length, targ_ph)

# at this point, inputs is a list of length sequence_length
# each element of inputs is [batch_size,vocab_size]

# targets is a list of length sequence_length
# each element of targets is a 1D vector of length batch_size

# ------------------
# YOUR COMPUTATION GRAPH HERE

# create a BasicLSTMCell
cell = BasicLSTMCell(state_dim)
stacked_lstm = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers,
                                           state_is_tuple=True)

initial_state = stacked_lstm.zero_state(batch_size, tf.float32)

# call seq2seq.rnn_decoder
outputs, final_state = tf.nn.seq2seq.rnn_decoder(inputs, initial_state,
                                                 stacked_lstm)

# transform the list of state outputs to a list of logits.

W = tf.Variable(tf.truncated_normal([state_dim, vocab_size], stddev=0.1))

# use a linear transformation.
logits = [tf.matmul(i, W) for i in outputs]
Beispiel #21
0
 def __init__(self, num_units, forget_bias=1.0, input_size=None):
     BasicLSTMCell.__init__(self,
                            num_units,
                            forget_bias=forget_bias,
                            input_size=input_size)
     self.matrix, self.bias = None, None
Beispiel #22
0
    def _build_forward(self):
        #config为预先配置好的参数等
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        #嵌入层
        with tf.variable_scope("emb"):
            #字符嵌入层
            if config.use_char_emb:  #若需要字符嵌入层
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    #CNN的滤波器参数
                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)

                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            #词嵌入层
            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:  #若调用已训练好的词嵌入文件
                        word_emb_mat = tf.concat(
                            0, [word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    #将文章主体context:x和问题query:q转换为词向量
                    #embedding_lookup(params, ids),根据ids寻找params中的第id行
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:  #若进行了字符嵌入,在指定维度上将字符嵌入和词嵌入进行拼接
                    xx = tf.concat(3, [xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(2, [qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # 经过两层highway network得到context vector∈ R^(d*T)和query vectorQ∈R^(d∗J)
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell = BasicLSTMCell(d, state_is_tuple=True)
        #SwitchableDropoutWrapper为自定义的DropoutWrapper类
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        #reduce_sum在指定的维度上求和(得到x和q的非空值总数),cast将输入的tensor映射到指定类型(此处为x_mask到int32)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        #Contextual Embedding Layer:对上一层得到的X和Q分别使用BiLSTM进行处理,分别捕捉X和Q中各自单词间的局部关系
        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            #fw_u和bw_u分别为双向lstm的output
            u = tf.concat(2, [fw_u, bw_u])  #[N, J, 2d]
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        #核心层Attention Flow Layer
        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                #expand_dims()在矩阵指定位置增加维度
                #tile()对矩阵的指定维度进行复制
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [
                    N * M, JQ, 2 * d
                ])  #先在索引1的位置添加一个维度,然后复制M(context中最多的sentence数量)次,使u和h能具有相同的维度
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell = d_cell

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(3, [fw_g1, bw_g1])

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell,
                d_cell,
                tf.concat(3, [p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(3, [fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            yp = tf.reshape(flat_yp, [-1, M, JX])
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
Beispiel #23
0
 def __init__(self, num_units, cell_type='lstm', scope=None):
     self.cell_fw = GRUCell(
         num_units) if cell_type == 'gru' else BasicLSTMCell(num_units)
     self.cell_bw = GRUCell(
         num_units) if cell_type == 'gru' else BasicLSTMCell(num_units)
     self.scope = scope or "bi_rnn"
Beispiel #24
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size
        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            0, [word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(3, [xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(2, [qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell = BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(2, [fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell = d_cell
            self.p = p0
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(3, [fw_g1, bw_g1])

        with tf.variable_scope("output"):
            if config.model_name == "basic":
                logits = get_logits([g1, p0], d, True, wd=config.wd, \
                        input_keep_prob=config.input_keep_prob,
                        mask=self.x_mask, is_train=self.is_train, \
                        func=config.answer_func, scope='logits1')
                a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]), \
                        tf.reshape(logits, [N, M * JX]))
                a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1), \
                        [1, M, JX, 1])
                (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(d_cell, d_cell, \
                        tf.concat(3, [p0, g1, a1i, g1 * a1i]),
                        x_len, dtype='float', scope='g2')  # [N, M, JX, 2d]
                g2 = tf.concat(3, [fw_g2, bw_g2])
                logits2 = get_logits([g2, p0], d, True, wd=config.wd, \
                        input_keep_prob=config.input_keep_prob, mask=self.x_mask,
                        is_train=self.is_train, func=config.answer_func,
                        scope='logits2')
                flat_logits = tf.reshape(logits, [-1, M * JX])
                flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
                yp = tf.reshape(flat_yp, [-1, M, JX])
                flat_logits2 = tf.reshape(logits2, [-1, M * JX])
                flat_yp2 = tf.nn.softmax(flat_logits2)
                yp2 = tf.reshape(flat_yp2, [-1, M, JX])

                self.tensor_dict['g1'] = g1
                self.tensor_dict['g2'] = g2

                self.logits = flat_logits
                self.logits2 = flat_logits2
                self.yp = yp
                self.yp2 = yp2

            elif config.model_name == "basic-class":
                C = 3 if config.data_dir.startswith('data/snli') else 2
                (fw_g2, bw_g2) = (fw_g1, bw_g1)

                if config.classifier == 'maxpool':
                    g2 = tf.concat(3, [fw_g2, bw_g2])  # [N, M, JX, 2d]
                    g2 = tf.reduce_max(g2, 2)  # [N, M, 2d]
                    g2_dim = 2 * d
                elif config.classifier == 'sumpool':
                    g2 = tf.concat(3, [fw_g2, bw_g2])
                    g2 = tf.reduce_sum(g2, 2)
                    g2_dim = 2 * d
                else:
                    fw_g2_ = tf.gather(tf.transpose(fw_g2, [2, 0, 1, 3]),
                                       JX - 1)
                    bw_g2_ = tf.gather(tf.transpose(bw_g2, [2, 0, 1, 3]), 0)
                    g2 = tf.concat(2, [fw_g2_, bw_g2_])
                    g2_dim = 2 * d

                g2_ = tf.reshape(g2, [N, g2_dim])

                logits0 = linear(g2_,
                                 C,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 is_train=self.is_train,
                                 scope='classifier')
                flat_yp0 = tf.nn.softmax(logits0)
                yp0 = tf.reshape(flat_yp0, [N, M, C])
                self.tensor_dict['g1'] = g1
                self.logits0 = logits0
                self.yp0 = yp0
                self.logits = logits0
                self.yp = yp0
Beispiel #25
0
def construct_model(images,
                    actions=None,
                    states=None,
                    iter_num=-1.0,
                    k=-1,
                    use_state=True,
                    num_masks=10,
                    stp=False,
                    cdna=True,
                    dna=False,
                    context_frames=2,
                    pix_distributions=None,
                    conf=None):
    """Build convolutional lstm video predictor using STP, CDNA, or DNA.

    Args:
      images: tensor of ground truth image sequences
      actions: tensor of action sequences
      states: tensor of ground truth state sequences
      iter_num: tensor of the current training iteration (for sched. sampling)
      k: constant used for scheduled sampling. -1 to feed in own prediction.
      use_state: True to include state and action in prediction
      num_masks: the number of different pixel motion predictions (and
                 the number of masks for each of those predictions)
      stp: True to use Spatial Transformer Predictor (STP)
      cdna: True to use Convoluational Dynamic Neural Advection (CDNA)
      dna: True to use Dynamic Neural Advection (DNA)
      context_frames: number of ground truth frames to pass in before
                      feeding in own predictions
      pix_distrib: the initial one-hot distriubtion for designated pixels
    Returns:
      gen_images: predicted future image frames
      gen_states: predicted future states

    Raises:
      ValueError: if more than one network option specified or more than 1 mask
      specified for DNA model.
    """

    if 'dna_size' in conf.keys():
        DNA_KERN_SIZE = conf['dna_size']
    else:
        DNA_KERN_SIZE = 5

    print 'constructing network with less layers...'

    if stp + cdna + dna != 1:
        raise ValueError('More than one, or no network option specified.')
    batch_size, img_height, img_width, color_channels = images[0].get_shape(
    )[0:4]
    batch_size = int(batch_size)
    lstm_func = basic_conv_lstm_cell

    # Generated robot states and images.
    gen_states, gen_images, gen_masks, inf_low_state, pred_low_state = [], [], [], [], []
    current_state = states[0]
    gen_pix_distrib = []

    summaries = []

    if k == -1:
        feedself = True
    else:
        # Scheduled sampling:
        # Calculate number of ground-truth frames to pass in.
        num_ground_truth = tf.to_int32(
            tf.round(
                tf.to_float(batch_size) * (k / (k + tf.exp(iter_num / k)))))
        feedself = False

    # LSTM state sizes and states.
    lstm_size = np.int32(np.array([16, 32, 64, 100, 10]))
    lstm_state1, lstm_state2, lstm_state3 = None, None, None

    single_lstm1 = BasicLSTMCell(lstm_size[3], state_is_tuple=True)
    single_lstm2 = BasicLSTMCell(lstm_size[4], state_is_tuple=True)
    low_dim_lstm = MultiRNNCell([single_lstm1, single_lstm2],
                                state_is_tuple=True)

    low_dim_lstm_state = low_dim_lstm.zero_state(batch_size, tf.float32)

    dim_low_state = int(lstm_size[-1])

    t = -1
    for image, action in zip(images[:-1], actions[:-1]):
        t += 1
        print 'building timestep ', t
        # Reuse variables after the first timestep.
        reuse = bool(gen_images)

        done_warm_start = len(gen_images) > context_frames - 1
        with slim.arg_scope([
                lstm_func, slim.layers.conv2d, slim.layers.fully_connected,
                tf_layers.layer_norm, slim.layers.conv2d_transpose
        ],
                            reuse=reuse):

            if feedself and done_warm_start:
                # Feed in generated image.
                prev_image = gen_images[-1]
                if pix_distributions != None:
                    prev_pix_distrib = gen_pix_distrib[-1]
            elif done_warm_start:
                # Scheduled sampling
                prev_image = scheduled_sample(image, gen_images[-1],
                                              batch_size, num_ground_truth)
            else:
                # Always feed in ground_truth
                prev_image = image
                if pix_distributions != None:
                    prev_pix_distrib = pix_distributions[t]
                    prev_pix_distrib = tf.expand_dims(prev_pix_distrib, -1)

            # Predicted state is always fed back in
            state_action = tf.concat(1, [action, current_state])  # 6x

            import pdb
            pdb.set_trace()
            enc0 = slim.layers.conv2d(  #32x32x32
                prev_image,
                32,
                kernel_size=[5, 5],
                stride=2,
                scope='scale1_conv1',
                normalizer_fn=tf_layers.layer_norm,
                normalizer_params={'scope': 'layer_norm1'})

            hidden1, lstm_state1 = lstm_func(  #32x32
                enc0, lstm_state1, lstm_size[0], scope='state1')
            hidden1 = tf_layers.layer_norm(hidden1, scope='layer_norm2')

            enc1 = slim.layers.conv2d(  #16x16
                hidden1,
                hidden1.get_shape()[3], [3, 3],
                stride=2,
                scope='conv2')

            hidden2, lstm_state2 = lstm_func(  #16x16x32
                enc1, lstm_state2, lstm_size[1], scope='state3')
            hidden2 = tf_layers.layer_norm(hidden2, scope='layer_norm4')

            enc2 = slim.layers.conv2d(  #8x8x32
                hidden2,
                hidden2.get_shape()[3], [3, 3],
                stride=2,
                scope='conv3')

            # Pass in state and action.
            smear = tf.reshape(
                state_action,
                [batch_size, 1, 1,
                 int(state_action.get_shape()[1])])
            smear = tf.tile(  #8x8x6
                smear,
                [1, int(enc2.get_shape()[1]),
                 int(enc2.get_shape()[2]), 1])
            if use_state:
                enc2 = tf.concat(3, [enc2, smear])
            enc3 = slim.layers.conv2d(  #8x8x32
                enc2,
                hidden2.get_shape()[3], [1, 1],
                stride=1,
                scope='conv4')

            hidden3, lstm_state3 = lstm_func(  #8x8x64
                enc3, lstm_state3, lstm_size[2], scope='state5')  # last 8x8
            hidden3 = tf_layers.layer_norm(hidden3, scope='layer_norm6')

            enc3 = slim.layers.conv2d(  # 8x8x32
                hidden3, 16, [1, 1], stride=1, scope='conv5')

            enc3_flat = tf.reshape(enc3, [batch_size, -1])

            if 'use_low_dim_lstm' in conf:
                with tf.variable_scope('low_dim_lstm', reuse=reuse):
                    hidden4, low_dim_lstm_state = low_dim_lstm(
                        enc3_flat, low_dim_lstm_state)
                low_dim_state = hidden4
            else:
                enc_fully1 = slim.layers.fully_connected(enc3_flat,
                                                         400,
                                                         scope='enc_fully1')

                enc_fully2 = slim.layers.fully_connected(enc_fully1,
                                                         100,
                                                         scope='enc_fully2')

                low_dim_state = enc_fully2

            # inferred low dimensional state:
            inf_low_state.append(low_dim_state)

            pred_low_state.append(project_fwd_lowdim(low_dim_state))

            smear = tf.reshape(low_dim_state,
                               [batch_size, 1, 1, dim_low_state])
            smear = tf.tile(  # 8x8xdim_hidden_state
                smear,
                [1, int(enc2.get_shape()[1]),
                 int(enc2.get_shape()[2]), 1])

            enc4 = slim.layers.conv2d_transpose(  #16x16x32
                smear,
                hidden3.get_shape()[3],
                3,
                stride=2,
                scope='convt1')

            enc5 = slim.layers.conv2d_transpose(  #32x32x32
                enc4,
                enc0.get_shape()[3],
                3,
                stride=2,
                scope='convt2')

            enc6 = slim.layers.conv2d_transpose(  #64x64x16
                enc5,
                16,
                3,
                stride=2,
                scope='convt3',
                normalizer_fn=tf_layers.layer_norm,
                normalizer_params={'scope': 'layer_norm9'})

            # Using largest hidden state for predicting untied conv kernels.
            enc7 = slim.layers.conv2d_transpose(enc6,
                                                DNA_KERN_SIZE**2,
                                                1,
                                                stride=1,
                                                scope='convt4')

            # Only one mask is supported (more should be unnecessary).
            if num_masks != 1:
                raise ValueError('Only one mask is supported for DNA model.')
            transformed = [dna_transformation(prev_image, enc7, DNA_KERN_SIZE)]

            if 'use_masks' in conf:
                masks = slim.layers.conv2d_transpose(enc6,
                                                     num_masks + 1,
                                                     1,
                                                     stride=1,
                                                     scope='convt7')
                masks = tf.reshape(
                    tf.nn.softmax(tf.reshape(masks, [-1, num_masks + 1])), [
                        int(batch_size),
                        int(img_height),
                        int(img_width), num_masks + 1
                    ])
                mask_list = tf.split(3, num_masks + 1, masks)
                output = mask_list[0] * prev_image
                for layer, mask in zip(transformed, mask_list[1:]):
                    output += layer * mask
            else:
                mask_list = None
                output = transformed

            gen_images.append(output)
            gen_masks.append(mask_list)

            if dna and pix_distributions != None:
                transf_distrib = [
                    dna_transformation(prev_pix_distrib, enc7, DNA_KERN_SIZE)
                ]

            if pix_distributions != None:
                pix_distrib_output = mask_list[0] * prev_pix_distrib
                mult_list = []
                for i in range(num_masks):
                    mult_list.append(transf_distrib[i] * mask_list[i + 1])
                    pix_distrib_output += mult_list[i]

                gen_pix_distrib.append(pix_distrib_output)

            # pred_low_state_stopped = tf.stop_gradient(pred_low_state)

            state_enc1 = slim.layers.fully_connected(
                # pred_low_state[-1],
                low_dim_state,
                100,
                scope='state_enc1')

            state_enc2 = slim.layers.fully_connected(
                state_enc1,
                # int(current_state.get_shape()[1]),
                4,
                scope='state_enc2',
                activation_fn=None)
            current_state = tf.squeeze(state_enc2)
            gen_states.append(current_state)

    if pix_distributions != None:
        return gen_images, gen_states, gen_masks, gen_pix_distrib, inf_low_state, pred_low_state
    else:
        return gen_images, gen_states, gen_masks, None, inf_low_state, pred_low_state
Beispiel #26
0
    def __init__(self, model_parameters, training_parameters, directories,
                 **kwargs):
        """ Initialization of the RNN Model as TensorFlow computational graph
    """

        self.model_parameters = model_parameters
        self.training_parameters = training_parameters
        self.directories = directories

        # Define model hyperparameters Tensors
        with tf.name_scope("Parameters"):
            self.learning_rate = tf.placeholder(tf.float32,
                                                name="learning_rate")
            self.momentum = tf.placeholder(tf.float32, name="momentum")
            self.input_keep_probability = tf.placeholder(
                tf.float32, name="input_keep_probability")
            self.output_keep_probability = tf.placeholder(
                tf.float32, name="output_keep_probability")

            self.is_training = tf.placeholder(tf.bool)

        # Define input, output and initialization Tensors
        with tf.name_scope("Input"):
            self.inputs = tf.placeholder("float", [
                None, self.model_parameters.sequence_length,
                self.model_parameters.input_dimension
            ],
                                         name='input_placeholder')

            self.targets = tf.placeholder(
                "float", [None, self.model_parameters.sequence_length, 1],
                name='labels_placeholder')

            self.init = tf.placeholder(
                tf.float32,
                shape=[None, self.model_parameters.state_size],
                name="init")

        # Define the TensorFlow RNN computational graph
        with tf.name_scope("LSTMRNN_RNN"):
            cells = []

            # Define the layers
            for _ in range(self.model_parameters.n_layers):
                if self.model_parameters.model == 'rnn':
                    cell = BasicRNNCell(self.model_parameters.state_size)
                elif self.model_parameters.model == 'gru':
                    cell = GRUCell(self.model_parameters.state_size)
                elif self.model_parameters.model == 'lstm':
                    cell = BasicLSTMCell(self.model_parameters.state_size,
                                         state_is_tuple=True)
                elif self.model_parameters.model == 'nas':
                    cell = NASCell(self.model_parameters.state_size)
                else:
                    raise Exception("model type not supported: {}".format(
                        self.model_parameters.model))

                if (self.model_parameters.output_keep_probability < 1.0
                        or self.model_parameters.input_keep_probability < 1.0):

                    if self.model_parameters.output_keep_probability < 1.0:
                        cell = DropoutWrapper(
                            cell,
                            output_keep_prob=self.output_keep_probability)

                    if self.model_parameters.input_keep_probability < 1.0:
                        cell = DropoutWrapper(
                            cell, input_keep_prob=self.input_keep_probability)

                cells.append(cell)
            cell = MultiRNNCell(cells)

            # Simulate time steps and get RNN cell output
            self.outputs, self.next_state = tf.nn.dynamic_rnn(cell,
                                                              self.inputs,
                                                              dtype=tf.float32)

        # Define cost Tensors
        with tf.name_scope("LSTMRNN_Cost"):

            # Flatten to apply same weights to all time steps
            self.flattened_outputs = tf.reshape(
                self.outputs, [-1, self.model_parameters.state_size],
                name="flattened_outputs")

            self.output_w = tf.Variable(tf.truncated_normal(
                [self.model_parameters.state_size, 1], stddev=0.01),
                                        name="output_weights")

            self.variable_summaries(self.output_w, 'output_weights')

            self.output_b = tf.Variable(tf.constant(0.1), name="output_biases")

            self.variable_summaries(self.output_w, 'output_biases')

            # Define decision threshold Tensor
            self.decision_threshold = tf.Variable(
                self.model_parameters.threshold, name="decision_threshold")

            # Define moving average step Tensor
            self.ma_step = tf.Variable(self.model_parameters.ma_step,
                                       name="ma_step")

            # Softmax activation layer, using RNN inner loop last output
            # logits and labels must have the same shape [batch_size, num_classes]

            self.logits = tf.add(tf.matmul(self.flattened_outputs,
                                           self.output_w),
                                 self.output_b,
                                 name="logits")

            self.logits_bn = self.batch_norm_wrapper(
                inputs=self.logits, is_training=self.is_training)

            tf.summary.histogram('logits', self.logits)
            tf.summary.histogram('logits_bn', self.logits_bn)

            self.predictions = tf.reshape(
                self.logits, [-1, self.model_parameters.sequence_length, 1],
                name="predictions")

            self.shaped_predictions = tf.reshape(self.predictions, [-1],
                                                 name="shaped_predictions")

            self.tmp_smoothed_predictions = tf.concat(
                [
                    self.shaped_predictions,
                    tf.fill(
                        tf.expand_dims(self.ma_step - 1, 0),
                        self.shaped_predictions[
                            tf.shape(self.shaped_predictions)[0] - 1])
                ],
                axis=0,
                name="tmp_smoothed_predictions")

            self.ma_loop_idx = tf.constant(0, dtype='int32')
            self.shaped_smoothed_predictions = tf.zeros([0], dtype='float32')

            _, self.shaped_smoothed_predictions = tf.while_loop(
                lambda i, _: i < tf.shape(self.shaped_predictions)[0],
                self.ma_while_body,
                [self.ma_loop_idx, self.shaped_smoothed_predictions],
                shape_invariants=[tf.TensorShape([]),
                                  tf.TensorShape([None])])

            self.smoothed_predictions = tf.reshape(
                self.shaped_smoothed_predictions,
                [-1, self.model_parameters.sequence_length, 1],
                name="smoothed_predictions")

            self.soft_predictions_summary = tf.summary.tensor_summary(
                "soft_predictions", self.smoothed_predictions)
            # self.soft_predictions_summary = tf.summary.tensor_summary("soft_predictions", self.predictions)

            # self.shaped_logits = tf.reshape(self.logits,
            #   [-1, self.model_parameters.sequence_length, 1],
            #   name="shaped_logits")

            # Cross-Entropy
            # self.cost = tf.reduce_mean(-tf.reduce_sum(
            #   self.targets * tf.log(self.predictions),
            #   reduction_indices=[2]), name="cross_entropy")

            # self.cross_entropy = tf.reduce_mean(
            #   tf.nn.sigmoid_cross_entropy_with_logits(_sentinel=None,
            #     labels=self.targets,
            #     logits=self.predictions),
            #   name="cross_entropy")

            # self.cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
            #   _sentinel=None,
            #   labels=self.targets,
            #   logits=self.shaped_logits,
            #   name="cross_entropy")

            # Root Mean Squared Error
            # self.mean_squared_error = tf.losses.mean_squared_error(
            #   labels=self.targets,
            #   predictions=self.predictions)

            self.cost = tf.sqrt(
                tf.reduce_mean(
                    tf.squared_difference(self.smoothed_predictions,
                                          self.targets)))

            # self.cost = tf.sqrt(tf.reduce_mean(
            #   tf.squared_difference(
            #     self.predictions, self.targets)))

            tf.summary.scalar('training_cost', self.cost)

            # self.cost = tf.reduce_mean(
            #   self.cross_entropy,
            #   name="cost")

            voicing_condition = tf.greater(
                self.smoothed_predictions,
                tf.fill(tf.shape(self.smoothed_predictions),
                        self.decision_threshold),
                name="thresholding")

            # voicing_condition = tf.greater(self.predictions,
            #   tf.fill(tf.shape(self.predictions), self.decision_threshold),
            #   name="thresholding")

            self.label_predictions = tf.where(
                voicing_condition,
                tf.ones_like(self.smoothed_predictions),
                tf.zeros_like(self.smoothed_predictions),
                name="label_predictions")

            # self.label_predictions = tf.where(voicing_condition,
            #   tf.ones_like(self.predictions) ,
            #   tf.zeros_like(self.predictions),
            #   name="label_predictions")

            self.hard_predictions_summary = tf.summary.tensor_summary(
                "hard_predictions", self.label_predictions)

            self.correct_prediction = tf.equal(self.label_predictions,
                                               self.targets,
                                               name="correct_predictions")

            self.r = tf.reshape(self.targets, [-1])
            self.h = tf.reshape(self.label_predictions, [-1])

            # Defined outside the while loop to avoid problems
            self.dump_one = tf.constant(1, dtype=tf.int32, shape=[])

            self.temp_pk_miss = tf.Variable([0], tf.int32, name='temp_pk_miss')
            self.temp_pk_falsealarm = tf.Variable([0],
                                                  tf.int32,
                                                  name='temp_pk_falsealarm')
            self.loop_idx = tf.constant(0, dtype=tf.int32, name='loop_idx')

            self.loop_vars = self.loop_idx, self.temp_pk_miss, self.temp_pk_falsealarm

            _, self.all_temp_pk_miss, self.all_temp_pk_falsealarm = tf.while_loop(
                self.while_condition,
                self.while_body,
                self.loop_vars,
                shape_invariants=(self.loop_idx.get_shape(),
                                  tf.TensorShape([None]),
                                  tf.TensorShape([None])))

            self.pk_miss = tf.reduce_mean(
                tf.cast(self.all_temp_pk_miss, tf.float32))
            tf.summary.scalar('p_miss', self.pk_miss)

            self.pk_falsealarm = tf.reduce_mean(
                tf.cast(self.all_temp_pk_falsealarm, tf.float32))
            tf.summary.scalar('p_falsealarm', self.pk_falsealarm)

            self.pk = tf.reduce_mean(tf.cast(
                tf.add(self.all_temp_pk_miss, self.all_temp_pk_falsealarm),
                tf.float32),
                                     name='pk')

            tf.summary.scalar('pk', self.pk)

            self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction,
                                                   tf.float32),
                                           name="accuracy")

            tf.summary.scalar('accuracy', self.accuracy)

            self.recall, self.update_op_recall = tf.metrics.recall(
                labels=self.targets,
                predictions=self.label_predictions,
                name="recall")

            tf.summary.scalar('recall', self.recall)

            self.precision, self.update_op_precision = tf.metrics.precision(
                labels=self.targets,
                predictions=self.label_predictions,
                name="precision")

            tf.summary.scalar('precision', self.precision)

        # Define Training Tensors
        with tf.name_scope("LSTMRNN_Train"):

            # Momentum optimisation
            self.optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate,
                momentum=self.momentum,
                name="optimizer")

            self.train_step = self.optimizer.minimize(self.cost,
                                                      name="train_step")

            # Initializing the variables
            self.initializer = tf.group(tf.global_variables_initializer(),
                                        tf.local_variables_initializer())
Beispiel #27
0
            name="b_h",
            shape=[self._num_units],
            initializer=tf.contrib.layers.variance_scaling_initializer())

        r_t = tf.sign(tf.matmul(inputs, W_xr) + tf.matmul(state, W_hr) + b_r)
        z_t = tf.sign(tf.matmul(inputs, W_xz) + tf.matmul(state, W_hz) + b_z)
        h_hat_t = tf.tanh(
            tf.matmul(inputs, W_xh) + tf.matmul(r_t * state, W_hh) + b_h)

        h_t = z_t * state + (1 - z_t) * h_hat_t

        return h_t, h_t


multi_cell = MultiRNNCell(
    [BasicLSTMCell(state_dim) for i in range(num_layers)])
#multi_cell = MultiRNNCell([mygru(state_dim) for i in range(num_layers)])
initial_state = multi_cell.zero_state(batch_size, dtype=tf.float32)

# call seq2seq.rnn_decoder
outputs, final_state = rnn_decoder(inputs, initial_state, multi_cell)

# transform the list of state outputs to a list of logits.
# use a linear transformation.
weights = tf.get_variable(
    name="W",
    shape=[state_dim, vocab_size],
    initializer=tf.contrib.layers.variance_scaling_initializer())
bias = tf.get_variable(
    name="b",
    shape=[vocab_size],
 def __init__(self, num_units, forget_bias=1.0, input_size=None):
     BasicLSTMCell.__init__(self, num_units, forget_bias=forget_bias, input_size=input_size)
     self.matrix, self.bias = None, None
Beispiel #29
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            print('word embedding')
            # if config.use_char_emb:
            #     with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
            #         char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

            #     with tf.variable_scope("char"):
            #         Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
            #         Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]
            #         Acx = tf.reshape(Acx, [-1, JX, W, dc])
            #         Acq = tf.reshape(Acq, [-1, JQ, W, dc])

            #         filter_sizes = list(map(int, config.out_channel_dims.split(',')))
            #         heights = list(map(int, config.filter_heights.split(',')))
            #         assert sum(filter_sizes) == dco, (filter_sizes, dco)
            #         with tf.variable_scope("conv"):
            #             xx = multi_conv1d(Acx, filter_sizes, heights, "VALID",  self.is_train, config.keep_prob, scope="xx")
            #             if config.share_cnn_weights:
            #                 tf.get_variable_scope().reuse_variables()
            #                 qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
            #             else:
            #                 qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq")
            #             xx = tf.reshape(xx, [-1, M, JX, dco])
            #             qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/gpu:7"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
                    # if config.use_glove_for_unk:
                    #     word_emb_mat = tf.concat(0, [word_emb_mat])
                    print(word_emb_mat)

                with tf.name_scope("word"):
                    print('embedding lookup')
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                    print('embedding lookup ready')
                # if config.use_char_emb:
                #     xx = tf.concat(3, [xx, Ax])  # [N, M, JX, di]
                #     qq = tf.concat(2, [qq, Aq])  # [N, JQ, di]
                # else:
                xx = Ax
                qq = Aq

        # highway network
        #if config.highway:
        #    with tf.variable_scope("highway"):
        #        xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
        #        tf.get_variable_scope().reuse_variables()
        #        qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq
        
        print('context emmbedding')
        cell = BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]
        print('prepro')
        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn(d_cell, d_cell, qq, q_len, dtype='float32', scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(2, [fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h 
        print('main pro')
        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d])
                q_mask = tf.reshape(tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ])
                first_cell = AttentionCell(cell, u, mask=q_mask, mapper='sim',
                                           input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
            else:
                p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict)
                first_cell = d_cell

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(first_cell, first_cell, p0, x_len, dtype='float', scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(first_cell, first_cell, g0, x_len, dtype='float', scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(3, [fw_g1, bw_g1])

            logits = get_logits([g1, p0], [config.batch_size,config.max_num_sents] , True, wd=config.wd, input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask, is_train=self.is_train, func='sigmoid', scope='logits1')
            # a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]), tf.reshape(logits, [N, M * JX]))
            # a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1), [1, M, JX, 1])

            # (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(d_cell, d_cell, tf.concat(3, [p0, g1, a1i, g1 * a1i]),
            #                                               x_len, dtype='float', scope='g2')  # [N, M, JX, 2d]
            # g2 = tf.concat(3, [fw_g2, bw_g2])
            # logits2 = get_logits([g2, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob,
            #                      mask=self.x_mask,
            #                      is_train=self.is_train, func=config.answer_func, scope='logits2')

            # flat_logits = tf.reshape(logits, [-1, M * JX])
            # flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            # yp = tf.reshape(flat_yp, [-1, M, JX])
            yp = tf.greater(0.5, logits)
            # flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            # flat_yp2 = tf.nn.softmax(flat_logits2)
            # yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            # self.tensor_dict['g2'] = g2

            self.logits = logits
            # self.logits2 = flat_logits2
            self.yp = yp
Beispiel #30
0
    def build_forward(self):
        config = self.config
        N, M, JX, JQ, VW , VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        word_emb = tf.get_variable(
            "word_emb_mat",
            shape=[config.word_vocab_size, config.word_emb_size],
            dtype='float',
            initializer=self.emb_mat)
        with tf.variable_scope("embedding"):

            with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                char_emb_mat = tf.get_variable("char_emb_mat",
                                               shape=[VC, dc],
                                               dtype='float')

            with tf.variable_scope("char"), tf.device("/cpu:0"):
                Acx = tf.nn.embedding_lookup(char_emb_mat,
                                             self.cx)  # [N, M, JX, W, dc]
                Acq = tf.nn.embedding_lookup(char_emb_mat,
                                             self.cq)  # [N, JQ, W, dc]
                Acx = tf.reshape(Acx, [-1, JX, W, dc])
                Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                filter_sizes = list(
                    map(int, config.out_channel_dims.split(',')))
                heights = list(map(int, config.filter_heights.split(',')))
                assert sum(filter_sizes) == dco, (filter_sizes, dco)
                with tf.variable_scope("conv"):
                    xx = multi_conv1d(Acx,
                                      filter_sizes,
                                      heights,
                                      "VALID",
                                      self.is_train,
                                      config.keep_prob,
                                      scope="xx")
                    if config.share_cnn_weights:
                        tf.get_variable_scope().reuse_variables()
                        qq = multi_conv1d(Acq,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                    else:
                        qq = multi_conv1d(Acq,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="qq")
                    xx = tf.reshape(xx, [-1, M, JX, dco])
                    qq = tf.reshape(qq, [-1, JQ, dco])

            with tf.name_scope("word"):
                Ax = tf.nn.embedding_lookup(word_emb, self.x)  # [N, M, JX, d]
                Aq = tf.nn.embedding_lookup(word_emb, self.q)  # [N, JQ, d]
                self.tensor_dict['x'] = Ax
                self.tensor_dict['q'] = Aq
            if config.use_char_emb:
                xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
            else:
                xx = Ax
                qq = Aq

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq
        '''x_len means the length of sequences '''
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("Encoding"):
            cell = BasicLSTMCell(d, state_is_tuple=True)
            encoding_cell = SwitchableDropoutWrapper(
                cell, self.is_train, input_keep_prob=config.input_keep_prob)

            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             encoding_cell,
                                             encoding_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat([fw_u, bw_u], 2)
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):

            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                ## G
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                cell = BasicLSTMCell(d, state_is_tuple=True)
                first_cell = SwitchableDropoutWrapper(
                    cell,
                    self.is_train,
                    input_keep_prob=config.input_keep_prob)
            '''the following can be simplified, by using multi-layer rnn'''
            ## 2 layers of bi rnn
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat([fw_g0, bw_g0], 3)

            cell = BasicLSTMCell(d, state_is_tuple=True)
            first_cell = SwitchableDropoutWrapper(
                cell, self.is_train, input_keep_prob=config.input_keep_prob)

            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            ##M
            g1 = tf.concat([fw_g1, bw_g1], 3)

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')

            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            cell = BasicLSTMCell(d, state_is_tuple=True)
            M2_operate_cell = SwitchableDropoutWrapper(
                cell, self.is_train, input_keep_prob=config.input_keep_prob)

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                M2_operate_cell,
                M2_operate_cell,
                tf.concat([p0, g1, a1i, g1 * a1i], 3),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            ## M^2
            g2 = tf.concat([fw_g2, bw_g2], 3)

            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            yp = tf.reshape(flat_yp, [-1, M, JX])
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
Beispiel #31
0
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import BasicLSTMCell
from tensorflow.python.ops.rnn_cell import MultiRNNCell

num_units = [128, 64]
cells = [BasicLSTMCell(num_units=n) for n in num_units]
stacked_rnn_cell = MultiRNNCell(cells)