예제 #1
0
 def add_lstm(self, inputs, i, name, backward=False):
     prev_init = tf.zeros([2, tf.shape(inputs)[1],
                           self.opts.units])  # [2, batch_size, num_units]
     #prev_init = tf.zeros([2, 100, self.opts.units])  # [2, batch_size, num_units]
     if i == 0:
         inputs_dim = self.inputs_dim
     else:
         inputs_dim = self.opts.units * 2  ## concat after each layer
     weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i),
                                inputs_dim, self.opts.units,
                                tf.shape(inputs)[1], self.hidden_prob)
     if backward:
         ## backward: reset states after zero paddings
         non_paddings = tf.transpose(
             self.weight,
             [1, 0])  ## [batch_size, seq_len] => [seq_len, batch_size]
         non_paddings = tf.reverse(non_paddings, [0])
         cell_hidden = tf.scan(
             lambda prev, x: lstm(prev, x, weights, backward=backward),
             [inputs, non_paddings], prev_init)
     else:
         cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights),
                               inputs, prev_init)
     #cell_hidden [seq_len, 2, batch_size, units]
     h = tf.unstack(cell_hidden, 2,
                    axis=1)[1]  #[seq_len, batch_size, units]
     return h
    def add_forward_path(self, forward_inputs_tensor, backward_embeddings, reuse=False):
        batch_size = tf.shape(forward_inputs_tensor)[1]
        prev_init = [tf.zeros([2*self.opts.num_layers, batch_size, self.opts.units]), tf.zeros([batch_size], tf.int32), 0, tf.zeros([batch_size, self.loader.nb_tags])]
        ## We need the following memory states (list of four elements): 
        ## 1. LSTM cell and h memories for each layer: [2*num_layers, batch_size, num_units] 
        ## 2. Previous predictions (stag_idx): [batch_size]
        ## 3. Time step for referencing backward path: int
        ## In addition, though it's not a memory state, we also add projected_outputs for calculation of loss: [batch_size, outputs_dim]
        name = 'Forward'
        ## Define all the necessary weights for recursion
        lstm_weights_list = []
        for i in xrange(self.opts.num_layers):
            if i == 0:
                inputs_dim = self.inputs_dim + self.opts.lm
            else:
                inputs_dim = self.opts.units
            lstm_weights_list.append(get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, batch_size, self.hidden_prob, 0, reuse))
        self.add_stag_embedding_mat()
        self.add_stag_dropout_mat(batch_size)
        self.add_lstm_dropout_mats(batch_size)
        ##

        all_states = tf.scan(lambda prev, x: self.add_one_forward(prev, x, lstm_weights_list, backward_embeddings), forward_inputs_tensor, prev_init)
        all_predictions = all_states[1] # [seq_len, batch_size]
        all_predictions = tf.transpose(all_predictions, perm=[1, 0]) # [batch_size, seq_len]
        all_projected_outputs = all_states[3] # [seq_len, batch_size, outputs_dim]
        all_projected_outputs = tf.transpose(all_projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, outputs_dim]
        return all_predictions, all_projected_outputs
 def add_forward_path_mt(self, forward_cells, forward_hs,
                         forward_inputs_tensor):
     # forward_cells list of [n, b, d] # num_layers elements
     ## forward_inputs_tensor: [n, b, d]
     shape = tf.shape(forward_inputs_tensor)
     seq_len, batch_size = shape[0], shape[1]  ## n, b
     #batch_size = tf.shape(forward_cells)[1]
     prev_cell_hiddens = [
         tf.stack([forward_cell[-1], forward_h[-1]], 0)
         for forward_cell, forward_h in zip(forward_cells, forward_hs)
     ]
     prev_cell_hiddens = tf.concat(prev_cell_hiddens,
                                   0)  # [2*num_layers, b, d]
     prev_init = [
         prev_cell_hiddens,
         tf.zeros([batch_size], tf.int32),
         tf.zeros([batch_size, self.loader.nb_tags])
     ]
     #prev_init = [tf.zeros([2*self.opts.num_layers, batch_size, self.opts.units]), tf.zeros([batch_size], tf.int32), tf.zeros([batch_size, self.loader.nb_tags])]
     ## We need the following memory states (list of four elements):
     ## 1. LSTM cell and h memories for each layer: [2*num_layers, batch_size, num_units]
     ## 2. Previous predictions (stag_idx): [batch_size]
     ## In addition, though it's not a memory state, we also add projected_outputs for calculation of loss: [batch_size, outputs_dim]
     name = 'Forward'
     ## Define all the necessary weights for recursion
     lstm_weights_list = []
     for i in xrange(self.opts.num_layers):
         if i == 0:
             inputs_dim = self.inputs_dim
         else:
             inputs_dim = self.opts.units
         lstm_weights_list.append(
             get_lstm_weights('{}_LSTM_layer{}'.format(name, i),
                              inputs_dim,
                              self.opts.units,
                              batch_size,
                              self.hidden_prob,
                              0,
                              reuse=True))
     lstm_weights_list[0] = get_decoder_weights(
         lstm_weights_list[0], '{}_LSTM_layer{}'.format(name, 0),
         self.opts.lm, self.opts.units)
     self.add_stag_embedding_mat()
     self.add_stag_dropout_mat(batch_size)
     ##
     all_states = tf.scan(
         lambda prev, x: self.add_one_forward_mt(prev, x, lstm_weights_list,
                                                 forward_inputs_tensor),
         forward_inputs_tensor, prev_init)
     all_predictions = all_states[1]  # [seq_len, batch_size]
     all_predictions = tf.transpose(all_predictions,
                                    perm=[1, 0])  # [batch_size, seq_len]
     all_projected_outputs = all_states[
         2]  # [seq_len, batch_size, outputs_dim]
     all_projected_outputs = tf.transpose(
         all_projected_outputs,
         perm=[1, 0, 2])  # [batch_size, seq_len, outputs_dim]
     return all_predictions, all_projected_outputs
    def add_forward_beam_path(self, forward_inputs_tensor, backward_embeddings, beam_size):
        batch_size = tf.shape(forward_inputs_tensor)[1] ## batch_size = self.batch_size = b
        prev_init = [tf.zeros([2, batch_size, self.opts.num_layers*self.opts.units]), tf.zeros([batch_size], tf.int32), 0, tf.zeros([batch_size, 1]), tf.zeros([batch_size], tf.int32)]
        ## We need the following memory states (list of four elements): 
        ## 1. LSTM cell and h memories for each layer: [2, batch_size, units*num_layers] 
        ## 2. Previous predictions (stag_idx): [batch_size] ## notice the difference between beam and greedy here
        ## 3. Time step for referencing backward path: int
        ## 4. For beam search, we also need to memorize scores: [batch_size]
        ## 5. Backpointer (Parent indices) for predictions
        name = 'Forward'
        ## Define all the necessary weights for recursion
        lstm_weights_list = []
        for i in xrange(self.opts.num_layers):
            if i == 0:
                inputs_dim = self.inputs_dim + self.opts.lm
            else:
                inputs_dim = self.opts.units
            lstm_weights_list.append(get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, batch_size, self.hidden_prob, beam_size))
        self.add_stag_embedding_mat()
        #self.add_stag_dropout_mat(batch_size) ## unnecessary since we are only testing
        ## First Iteration has only self.batch_size configurations. For the sake of tf.scan function, calculate the first. 
        first_inputs = tf.squeeze(tf.slice(forward_inputs_tensor, [0, 0, 0], [1, -1, -1]), axis=0) ## [batch_size, inputs_dim+lm]
        forward_inputs_tensor = tf.slice(forward_inputs_tensor, [1, 0, 0], [-1, -1, -1])
        prev_init = self.add_one_beam_forward(prev_init, first_inputs, lstm_weights_list, backward_embeddings, beam_size, batch_size) 
        first_predictions = tf.expand_dims(prev_init[1], 0) ## [1, batch_size]
        first_scores = tf.expand_dims(prev_init[3], 0) ## [1, batch_size, 1]

        ## Now, move on to the second iteration and beyond
        initial_shape = tf.shape(forward_inputs_tensor)
        forward_inputs_tensor = tf.reshape(tf.tile(forward_inputs_tensor, [1, 1, beam_size]), [initial_shape[0], initial_shape[1]*beam_size, initial_shape[2]])
        ## [seq_len-1, self.batch_size, inputs_dim] -> [seq_len-1, self.batch_size*beam_size (B*b), inputs_dim]
        batch_size = initial_shape[1]*beam_size ## Bb
        all_states = tf.scan(lambda prev, x: self.add_one_beam_forward(prev, x, lstm_weights_list, backward_embeddings, beam_size, batch_size, True), forward_inputs_tensor, prev_init, back_prop=False) ## no backprop for testing reuse projection weights from the first iteration
        back_pointers = all_states[4] # [seq_len-1, batch_size]
        back_pointers = tf.transpose(back_pointers, perm=[1, 0])
        all_predictions = all_states[1] # [seq_len-1, batch_size]
        all_predictions = tf.concat([first_predictions, all_predictions], 0)
        all_predictions = tf.transpose(all_predictions, perm=[1, 0]) # [batch_size, seq_len]
        all_scores = all_states[3] # [seq_len-1, batch_size, 1]
        all_scores = tf.concat([first_scores, all_scores], 0)
        all_scores = tf.squeeze(all_scores, axis=2)
        all_scores = tf.transpose(all_scores, perm=[1, 0])
        return all_predictions, all_scores, back_pointers
예제 #5
0
 def add_lstm(self, inputs, i, name):  ## need to access c
     prev_init = tf.zeros([2, tf.shape(inputs)[1],
                           self.opts.units])  # [2, batch_size, num_units]
     #prev_init = tf.zeros([2, 100, self.opts.units])  # [2, batch_size, num_units]
     if i == 0:
         inputs_dim = self.inputs_dim
     else:
         inputs_dim = self.opts.units
     weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i),
                                inputs_dim, self.opts.units,
                                tf.shape(inputs)[1], self.hidden_prob)
     cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs,
                           prev_init)
     #cell_hidden [seq_len, 2, batch_size, units]
     c = tf.unstack(cell_hidden, 2,
                    axis=1)[0]  #[seq_len, batch_size, units]
     h = tf.unstack(cell_hidden, 2,
                    axis=1)[1]  #[seq_len, batch_size, units]
     return c, h