def add_lstm(self, inputs, i, name, backward=False): prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units]) # [2, batch_size, num_units] #prev_init = tf.zeros([2, 100, self.opts.units]) # [2, batch_size, num_units] if i == 0: inputs_dim = self.inputs_dim else: inputs_dim = self.opts.units * 2 ## concat after each layer weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob) if backward: ## backward: reset states after zero paddings non_paddings = tf.transpose( self.weight, [1, 0]) ## [batch_size, seq_len] => [seq_len, batch_size] non_paddings = tf.reverse(non_paddings, [0]) cell_hidden = tf.scan( lambda prev, x: lstm(prev, x, weights, backward=backward), [inputs, non_paddings], prev_init) else: cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init) #cell_hidden [seq_len, 2, batch_size, units] h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units] return h
def add_forward_path(self, forward_inputs_tensor, backward_embeddings, reuse=False): batch_size = tf.shape(forward_inputs_tensor)[1] prev_init = [tf.zeros([2*self.opts.num_layers, batch_size, self.opts.units]), tf.zeros([batch_size], tf.int32), 0, tf.zeros([batch_size, self.loader.nb_tags])] ## We need the following memory states (list of four elements): ## 1. LSTM cell and h memories for each layer: [2*num_layers, batch_size, num_units] ## 2. Previous predictions (stag_idx): [batch_size] ## 3. Time step for referencing backward path: int ## In addition, though it's not a memory state, we also add projected_outputs for calculation of loss: [batch_size, outputs_dim] name = 'Forward' ## Define all the necessary weights for recursion lstm_weights_list = [] for i in xrange(self.opts.num_layers): if i == 0: inputs_dim = self.inputs_dim + self.opts.lm else: inputs_dim = self.opts.units lstm_weights_list.append(get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, batch_size, self.hidden_prob, 0, reuse)) self.add_stag_embedding_mat() self.add_stag_dropout_mat(batch_size) self.add_lstm_dropout_mats(batch_size) ## all_states = tf.scan(lambda prev, x: self.add_one_forward(prev, x, lstm_weights_list, backward_embeddings), forward_inputs_tensor, prev_init) all_predictions = all_states[1] # [seq_len, batch_size] all_predictions = tf.transpose(all_predictions, perm=[1, 0]) # [batch_size, seq_len] all_projected_outputs = all_states[3] # [seq_len, batch_size, outputs_dim] all_projected_outputs = tf.transpose(all_projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, outputs_dim] return all_predictions, all_projected_outputs
def add_forward_path_mt(self, forward_cells, forward_hs, forward_inputs_tensor): # forward_cells list of [n, b, d] # num_layers elements ## forward_inputs_tensor: [n, b, d] shape = tf.shape(forward_inputs_tensor) seq_len, batch_size = shape[0], shape[1] ## n, b #batch_size = tf.shape(forward_cells)[1] prev_cell_hiddens = [ tf.stack([forward_cell[-1], forward_h[-1]], 0) for forward_cell, forward_h in zip(forward_cells, forward_hs) ] prev_cell_hiddens = tf.concat(prev_cell_hiddens, 0) # [2*num_layers, b, d] prev_init = [ prev_cell_hiddens, tf.zeros([batch_size], tf.int32), tf.zeros([batch_size, self.loader.nb_tags]) ] #prev_init = [tf.zeros([2*self.opts.num_layers, batch_size, self.opts.units]), tf.zeros([batch_size], tf.int32), tf.zeros([batch_size, self.loader.nb_tags])] ## We need the following memory states (list of four elements): ## 1. LSTM cell and h memories for each layer: [2*num_layers, batch_size, num_units] ## 2. Previous predictions (stag_idx): [batch_size] ## In addition, though it's not a memory state, we also add projected_outputs for calculation of loss: [batch_size, outputs_dim] name = 'Forward' ## Define all the necessary weights for recursion lstm_weights_list = [] for i in xrange(self.opts.num_layers): if i == 0: inputs_dim = self.inputs_dim else: inputs_dim = self.opts.units lstm_weights_list.append( get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, batch_size, self.hidden_prob, 0, reuse=True)) lstm_weights_list[0] = get_decoder_weights( lstm_weights_list[0], '{}_LSTM_layer{}'.format(name, 0), self.opts.lm, self.opts.units) self.add_stag_embedding_mat() self.add_stag_dropout_mat(batch_size) ## all_states = tf.scan( lambda prev, x: self.add_one_forward_mt(prev, x, lstm_weights_list, forward_inputs_tensor), forward_inputs_tensor, prev_init) all_predictions = all_states[1] # [seq_len, batch_size] all_predictions = tf.transpose(all_predictions, perm=[1, 0]) # [batch_size, seq_len] all_projected_outputs = all_states[ 2] # [seq_len, batch_size, outputs_dim] all_projected_outputs = tf.transpose( all_projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, outputs_dim] return all_predictions, all_projected_outputs
def add_forward_beam_path(self, forward_inputs_tensor, backward_embeddings, beam_size): batch_size = tf.shape(forward_inputs_tensor)[1] ## batch_size = self.batch_size = b prev_init = [tf.zeros([2, batch_size, self.opts.num_layers*self.opts.units]), tf.zeros([batch_size], tf.int32), 0, tf.zeros([batch_size, 1]), tf.zeros([batch_size], tf.int32)] ## We need the following memory states (list of four elements): ## 1. LSTM cell and h memories for each layer: [2, batch_size, units*num_layers] ## 2. Previous predictions (stag_idx): [batch_size] ## notice the difference between beam and greedy here ## 3. Time step for referencing backward path: int ## 4. For beam search, we also need to memorize scores: [batch_size] ## 5. Backpointer (Parent indices) for predictions name = 'Forward' ## Define all the necessary weights for recursion lstm_weights_list = [] for i in xrange(self.opts.num_layers): if i == 0: inputs_dim = self.inputs_dim + self.opts.lm else: inputs_dim = self.opts.units lstm_weights_list.append(get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, batch_size, self.hidden_prob, beam_size)) self.add_stag_embedding_mat() #self.add_stag_dropout_mat(batch_size) ## unnecessary since we are only testing ## First Iteration has only self.batch_size configurations. For the sake of tf.scan function, calculate the first. first_inputs = tf.squeeze(tf.slice(forward_inputs_tensor, [0, 0, 0], [1, -1, -1]), axis=0) ## [batch_size, inputs_dim+lm] forward_inputs_tensor = tf.slice(forward_inputs_tensor, [1, 0, 0], [-1, -1, -1]) prev_init = self.add_one_beam_forward(prev_init, first_inputs, lstm_weights_list, backward_embeddings, beam_size, batch_size) first_predictions = tf.expand_dims(prev_init[1], 0) ## [1, batch_size] first_scores = tf.expand_dims(prev_init[3], 0) ## [1, batch_size, 1] ## Now, move on to the second iteration and beyond initial_shape = tf.shape(forward_inputs_tensor) forward_inputs_tensor = tf.reshape(tf.tile(forward_inputs_tensor, [1, 1, beam_size]), [initial_shape[0], initial_shape[1]*beam_size, initial_shape[2]]) ## [seq_len-1, self.batch_size, inputs_dim] -> [seq_len-1, self.batch_size*beam_size (B*b), inputs_dim] batch_size = initial_shape[1]*beam_size ## Bb all_states = tf.scan(lambda prev, x: self.add_one_beam_forward(prev, x, lstm_weights_list, backward_embeddings, beam_size, batch_size, True), forward_inputs_tensor, prev_init, back_prop=False) ## no backprop for testing reuse projection weights from the first iteration back_pointers = all_states[4] # [seq_len-1, batch_size] back_pointers = tf.transpose(back_pointers, perm=[1, 0]) all_predictions = all_states[1] # [seq_len-1, batch_size] all_predictions = tf.concat([first_predictions, all_predictions], 0) all_predictions = tf.transpose(all_predictions, perm=[1, 0]) # [batch_size, seq_len] all_scores = all_states[3] # [seq_len-1, batch_size, 1] all_scores = tf.concat([first_scores, all_scores], 0) all_scores = tf.squeeze(all_scores, axis=2) all_scores = tf.transpose(all_scores, perm=[1, 0]) return all_predictions, all_scores, back_pointers
def add_lstm(self, inputs, i, name): ## need to access c prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units]) # [2, batch_size, num_units] #prev_init = tf.zeros([2, 100, self.opts.units]) # [2, batch_size, num_units] if i == 0: inputs_dim = self.inputs_dim else: inputs_dim = self.opts.units weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob) cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init) #cell_hidden [seq_len, 2, batch_size, units] c = tf.unstack(cell_hidden, 2, axis=1)[0] #[seq_len, batch_size, units] h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units] return c, h