def BiRNN(x, weights): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, num_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input) x = tf.unstack(x, max_length, 1) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn.AttentionCellWrapper( rnn.BasicLSTMCell(dims, forget_bias=1.0), max_length) # Backward direction cell lstm_bw_cell = rnn.AttentionCellWrapper( rnn.BasicLSTMCell(dims, forget_bias=1.0), max_length) # Get lstm cell output outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) print("BiLSTM lengths: ", len(outputs)) # Linear activation, using rnn inner loop last output return outputs[-1]
def _build(self): """Get feed forward step, loss function, and optimizer for RNN""" # Define input layers self.sentences = tf.placeholder(tf.int32, [None, None]) self.sentence_lengths = tf.placeholder(tf.int32, [None]) self.train_marginals = tf.placeholder(tf.float32, [None]) self.keep_prob = tf.placeholder(tf.float32) # Seeds s = self.seed s1, s2, s3, s4 = [None] * 4 if s is None else [s + i for i in range(4)] # Embedding layer emb_var = tf.Variable(self._embedding_init(s1)) embedding = tf.concat([tf.zeros([1, self.dim]), emb_var], axis=0) inputs = tf.nn.embedding_lookup(embedding, self.sentences) # Build RNN graph batch_size = tf.shape(self.sentences)[0] rand_name = "RNN_{0}".format(random.randint(0, 1e12)) # Obscene hack init = tf.contrib.layers.xavier_initializer(seed=s2) with tf.variable_scope(rand_name, reuse=False, initializer=init): # Build RNN cells fw_cell = self.cell(self.dim) bw_cell = self.cell(self.dim) # Add attention if needed if self.attn: fw_cell = rnn.AttentionCellWrapper(fw_cell, self.attn, state_is_tuple=True) bw_cell = rnn.AttentionCellWrapper(bw_cell, self.attn, state_is_tuple=True) # Construct RNN initial_state_fw = fw_cell.zero_state(batch_size, tf.float32) initial_state_bw = bw_cell.zero_state(batch_size, tf.float32) rnn_out, _ = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, inputs, sequence_length=self.sentence_lengths, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, time_major=False) # Get potentials potentials = get_bi_rnn_output(rnn_out, self.dim, self.sentence_lengths) # Compute activation potentials_dropout = tf.nn.dropout(potentials, self.keep_prob, seed=s3) W = tf.Variable(tf.random_normal((2 * self.dim, 1), stddev=SD, seed=s4)) b = tf.Variable(0., dtype=tf.float32) h_dropout = tf.squeeze(tf.matmul(potentials_dropout, W)) + b # Noise-aware loss self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=self.train_marginals, logits=h_dropout)) # Backprop trainer self.train_fn = tf.train.AdamOptimizer(self.lr).minimize(self.loss) # Get prediction self.prediction = tf.nn.sigmoid(h_dropout)
def rnn_estimator(x, y): """RNN estimator with target predictor function on top.""" x = input_op_fn(x) if cell_type == 'rnn': cell_fn = nn.rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = nn.rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = nn.rnn_cell.BasicLSTMCell else: raise ValueError( 'cell_type {} is not supported. '.format(cell_type)) # TODO: state_is_tuple=False is deprecated if bidirectional: # forward direction cell fw_cell = cell_fn(rnn_size) bw_cell = cell_fn(rnn_size) # attach attention cells if specified if attn_length is not None: fw_cell = contrib_rnn.AttentionCellWrapper( fw_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) bw_cell = contrib_rnn.AttentionCellWrapper( fw_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) rnn_fw_cell = nn.rnn_cell.MultiRNNCell([fw_cell] * num_layers) # backward direction cell rnn_bw_cell = nn.rnn_cell.MultiRNNCell([bw_cell] * num_layers) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter _, encoding = bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, x, dtype=dtypes.float32, sequence_length=sequence_length, initial_state_fw=initial_state, initial_state_bw=initial_state) else: rnn_cell = cell_fn(rnn_size) if attn_length is not None: rnn_cell = contrib_rnn.AttentionCellWrapper( rnn_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) cell = nn.rnn_cell.MultiRNNCell([rnn_cell] * num_layers) _, encoding = nn.rnn(cell, x, dtype=dtypes.float32, sequence_length=sequence_length, initial_state=initial_state) return target_predictor_fn(encoding, y)
def attn_rnn_cell(): return contrib_rnn.AttentionCellWrapper( rnn_cell(), attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False)
def make_rnn_cell(rnn_layer_sizes, dropout_keep_prob=1.0, attn_length=0, base_cell=contrib_rnn.BasicLSTMCell, residual_connections=False): cells = [] for i in range(len(rnn_layer_sizes)): cell = base_cell(rnn_layer_sizes[i]) if attn_length and not cells: # Add attention wrapper to first layer. cell = contrib_rnn.AttentionCellWrapper(cell, attn_length, state_is_tuple=True) if residual_connections: cell = contrib_rnn.ResidualWrapper(cell) if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]: cell = contrib_rnn.InputProjectionWrapper( cell, rnn_layer_sizes[i]) cell = contrib_rnn.DropoutWrapper(cell, output_keep_prob=dropout_keep_prob) cells.append(cell) cell = contrib_rnn.MultiRNNCell(cells) return cell
def lstm_cell(lstm_unit=256): cell = tf.nn.rnn_cell.LSTMCell(num_units=lstm_unit) cell = rnn.AttentionCellWrapper( cell=cell, attn_length=self._attention_length, state_is_tuple=True) cell = tf.nn.rnn_cell.DropoutWrapper( cell=cell, input_keep_prob=self._keep_prob) return cell
def bidirectional_LSTM(x, n_hidden, return_seq=False, attention=0, cell='LSTM'): # maybe x.shape = (batch_size, seq_len, dim) # change x to list of (batch_size, dim) #x = tf.unstack(x, None, 1) if cell == 'GRU': cell_forward = rnn.GRUCell(n_hidden) cell_backward = rnn.GRUCell(n_hidden) if cell == 'LSTM': cell_forward = rnn.LSTMCell(n_hidden) cell_backward = rnn.LSTMCell(n_hidden) if cell == 'TF-LSTM': cell_forward = rnn.TimeFreqLSTMCell(num_units=n_hidden, feature_size=3, frequency_skip=1) cell_backward = rnn.TimeFreqLSTMCell(num_units=n_hidden, feature_size=3, frequency_skip=1) if cell == 'Grid-LSTM': cell_forward = rnn.GridLSTMCell(n_hidden, num_frequency_blocks=[5]) cell_backward = rnn.GridLSTMCell(n_hidden, num_frequency_blocks=[5]) if attention == 0: pass else: cell_forward = rnn.AttentionCellWrapper(cell_forward, attn_length=attention) cell_backward = rnn.AttentionCellWrapper(cell_backward, attn_length=attention) h, _, _ = \ rnn.static_bidirectional_rnn(cell_forward, cell_backward, x, dtype=tf.float32) if return_seq == True: return h else: return h[-1]
def inference(x, n_in=None, n_time=None, n_hidden=None, n_out=None, keep_prob=None): def weight_variable(shape, name='W'): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial, name) def bias_variable(shape): initial = tf.zeros(shape, dtype=tf.float32) return tf.Variable(initial) x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_in]) x = tf.split(x, n_time, 0) with tf.name_scope('RNN'): cell_forward = rnn.GRUCell(n_hidden) cell_forward = rnn.AttentionCellWrapper(cell_forward, attn_length=14) cell_backward = rnn.GRUCell(n_hidden) cell_backward = rnn.AttentionCellWrapper(cell_backward, attn_length=14) h, _, _ = \ rnn.static_bidirectional_rnn(cell_forward, cell_backward, x, dtype=tf.float32) h = h[-1] h = tf.nn.dropout(h, keep_prob) with tf.name_scope('fc_NN'): W = weight_variable([n_hidden * 2, n_hidden], name='W') b = bias_variable([n_hidden]) h = tf.nn.elu(tf.layers.batch_normalization(tf.matmul(h, W) + b)) h = tf.nn.dropout(h, keep_prob) Wo = weight_variable([n_hidden, n_out], name='Wo') bo = bias_variable([n_out]) y = tf.nn.softmax(tf.layers.batch_normalization(tf.matmul(h, Wo) + bo)) W_list = [W, Wo] return y, W_list
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) # Define a lstm cell with tensorflow #lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) lstm_cell = rnn.AttentionCellWrapper( cell=rnn.BasicLSTMCell(num_hidden, forget_bias=1.0), attn_length= 10) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def make_rnn_cell(rnn_layer_sizes, dropout_keep_prob=1.0, attn_length=0, base_cell=contrib_rnn.BasicLSTMCell, residual_connections=False): """Makes a RNN cell from the given hyperparameters. Args: rnn_layer_sizes: A list of integer sizes (in units) for each layer of the RNN. dropout_keep_prob: The float probability to keep the output of any given sub-cell. attn_length: The size of the attention vector. base_cell: The base tf.contrib.rnn.RNNCell to use for sub-cells. residual_connections: Whether or not to use residual connections (via tf.contrib.rnn.ResidualWrapper). Returns: A tf.contrib.rnn.MultiRNNCell based on the given hyperparameters. """ cells = [] for i in range(len(rnn_layer_sizes)): cell = base_cell(rnn_layer_sizes[i]) if attn_length and not cells: # Add attention wrapper to first layer. cell = contrib_rnn.AttentionCellWrapper(cell, attn_length, state_is_tuple=True) if residual_connections: cell = contrib_rnn.ResidualWrapper(cell) if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]: cell = contrib_rnn.InputProjectionWrapper( cell, rnn_layer_sizes[i]) cell = contrib_rnn.DropoutWrapper(cell, output_keep_prob=dropout_keep_prob) cells.append(cell) cell = contrib_rnn.MultiRNNCell(cells) return cell
def infer(x, y, batch_size, is_training, num_input_digits=None, num_output_digits=None, num_hidden=None, num_out=None): def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial) def bias_variable(shape): initial = tf.zeros(shape, dtype=tf.float32) return tf.Variable(initial) # Encoder. encoder = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) encoder = rnn.AttentionCellWrapper(encoder, num_input_digits, state_is_tuple=True) state = encoder.zero_state(batch_size, tf.float32) encoder_outputs = [] encoder_states = [] with tf.variable_scope('Encoder'): for t in range(num_input_digits): if t > 0: tf.get_variable_scope().reuse_variables() # x = (samples, time-steps, features). (output, state) = encoder(x[:, t, :], state) encoder_outputs.append(output) encoder_states.append(state) # Decoder. decoder = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) decoder = rnn.AttentionCellWrapper(decoder, num_input_digits, state_is_tuple=True) state = encoder_states[-1] decoder_outputs = [encoder_outputs[-1]] # Pre-define weight and bias of output layer. V = weight_variable([num_hidden, num_out]) c = bias_variable([num_out]) outputs = [] with tf.variable_scope('Decoder'): for t in range(1, num_output_digits): if t > 1: tf.get_variable_scope().reuse_variables() if is_training is True: # y = (samples, time-steps, features). (output, state) = decoder(y[:, t - 1, :], state) else: # Use the previous output as an input. linear = tf.matmul(decoder_outputs[-1], V) + c out = tf.nn.softmax(linear) outputs.append(out) out = tf.one_hot(tf.argmax(out, -1), depth=num_output_digits) (output, state) = decoder(out, state) decoder_outputs.append(output) if is_training is True: output = tf.reshape(tf.concat(decoder_outputs, axis=1), [-1, num_output_digits, num_hidden]) linear = tf.einsum('ijk,kl->ijl', output, V) + c #linear = tf.matmul(output, V) + c return tf.nn.softmax(linear) else: # Compute the final output. linear = tf.matmul(decoder_outputs[-1], V) + c out = tf.nn.softmax(linear) outputs.append(out) output = tf.reshape(tf.concat(outputs, axis=1), [-1, num_output_digits, num_out]) return output
graph = tf.Graph() with graph.as_default(): #------------------------------------construct LSTM------------------------------------------# #place hoder X_p = tf.placeholder(dtype=tf.float32, shape=(None, TIME_STEPS, 28), name="input_placeholder") y_p = tf.placeholder(dtype=tf.float32, shape=(None, 10), name="pred_placeholder") #lstm instance lstm_forward_1 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS1) #加attention(这里的attention和encoder-decoder架构的attention稍有不同) lstm_forward_1 = rnn.AttentionCellWrapper(cell=lstm_forward_1, attn_length=5) lstm_forward_2 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS) # 加attention lstm_forward_2 = rnn.AttentionCellWrapper(cell=lstm_forward_2, attn_length=5) lstm_forward = rnn.MultiRNNCell(cells=[lstm_forward_1, lstm_forward_2]) lstm_backward_1 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS1) #加attention lstm_backward_1 = rnn.AttentionCellWrapper(cell=lstm_backward_1, attn_length=5) lstm_backward_2 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS) lstm_backward_2 = rnn.AttentionCellWrapper(cell=lstm_backward_2, attn_length=5)
def inference(x, y, n_batch, is_training, input_digits=None, output_digits=None, n_hidden=None, n_out=None): def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial) def bias_variable(shape): initial = tf.zeros(shape, dtype=tf.float32) return tf.Variable(initial) # Encode encoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) encoder = rnn.AttentionCellWrapper(encoder, input_digits, state_is_tuple=True) state = encoder.zero_state(n_batch, tf.float32) encoder_outputs = [] encoder_states = [] with tf.variable_scope('Encoder'): for t in range(input_digits): if t > 0: tf.get_variable_scope().reuse_variables() (output, state) = encoder(x[:, t, :], state) encoder_outputs.append(output) encoder_states.append(state) # Decode decoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) decoder = rnn.AttentionCellWrapper(decoder, input_digits, state_is_tuple=True) state = encoder_states[-1] decoder_outputs = [encoder_outputs[-1]] # 출력층의 웨이트와 바이어스를 미리 정의해둔다 V = weight_variable([n_hidden, n_out]) c = bias_variable([n_out]) outputs = [] with tf.variable_scope('Decoder'): for t in range(1, output_digits): if t > 1: tf.get_variable_scope().reuse_variables() if is_training is True: (output, state) = decoder(y[:, t - 1, :], state) else: # 직전의 출력을 구한다 linear = tf.matmul(decoder_outputs[-1], V) + c out = tf.nn.softmax(linear) outputs.append(out) out = tf.one_hot(tf.argmax(out, -1), depth=output_digits) (output, state) = decoder(out, state) decoder_outputs.append(output) if is_training is True: output = tf.reshape(tf.concat(decoder_outputs, axis=1), [-1, output_digits, n_hidden]) linear = tf.einsum('ijk,kl->ijl', output, V) + c return tf.nn.softmax(linear) else: # 마지막 출력을 구한다 linear = tf.matmul(decoder_outputs[-1], V) + c out = tf.nn.softmax(linear) outputs.append(out) output = tf.reshape(tf.concat(outputs, axis=1), [-1, output_digits, n_out]) return output
def hierarchy(self, inputs, y_masked, seq_length, scope_name, reuse=False): if scope_name == "pw": encoder_scope_name = "en_lstm_pw" decoder_scope_name = "de_lstm_pw" elif scope_name == "pph": encoder_scope_name = "en_lstm_pph" decoder_scope_name = "de_lstm_pph" else: encoder_scope_name = "en_lstm_iph" decoder_scope_name = "de_lstm_iph" with tf.variable_scope(name_or_scope=scope_name, reuse=reuse): #forward part lstm_forward1 = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # 加attention(这里的attention和encoder-decoder架构的attention稍有不同) lstm_forward1 = rnn.AttentionCellWrapper(cell=lstm_forward1, attn_length=5) lstm_forward2 = rnn.BasicLSTMCell(num_units=self.hidden_units_num) #加attention lstm_forward2 = rnn.AttentionCellWrapper(cell=lstm_forward2, attn_length=5) lstm_forward = rnn.MultiRNNCell( cells=[lstm_forward1, lstm_forward2]) # dropout lstm_forward = rnn.DropoutWrapper( cell=lstm_forward, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p) #backward part lstm_backward1 = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # 加attention lstm_backward1 = rnn.AttentionCellWrapper(cell=lstm_backward1, attn_length=5) lstm_backward2 = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # 加attention lstm_backward2 = rnn.AttentionCellWrapper(cell=lstm_backward2, attn_length=5) lstm_backward = rnn.MultiRNNCell( cells=[lstm_backward1, lstm_backward2]) #drop out lstm_backward = rnn.DropoutWrapper( cell=lstm_backward, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_forward, cell_bw=lstm_backward, inputs=inputs, sequence_length=seq_length, dtype=tf.float32, scope=decoder_scope_name) outputs_forward = outputs[ 0] # shape of h is [batch_size, max_time, cell_fw.output_size] outputs_backward = outputs[ 1] # shape of h is [batch_size, max_time, cell_bw.output_size] # concat final outputs [batch_size, max_time, cell_fw.output_size*2] final_outputs = tf.concat( values=[outputs_forward, outputs_backward], axis=2) #shape of h: [batch * time_steps, hidden_units * 2] h = tf.reshape(tensor=final_outputs, shape=(-1, self.hidden_units_num * 2)) # 全连接dropout h = tf.nn.dropout(x=h, keep_prob=self.keep_prob_p) # fully connect layer(projection) weight = tf.get_variable( name="Weight", shape=(self.hidden_units_num * 2, self.class_num), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) bias = tf.get_variable( name="Bias", shape=(self.class_num, ), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # logits:[batch_size*max_time, 2] #logits =tf.nn.elu(features=tf.matmul(h, weight) + bias) logits = tf.matmul(h, weight) + bias # logits in an normal way:[batch_size,max_time_stpes,2] logits_normal = tf.reshape(tensor=logits, shape=(-1, self.max_sentence_size, self.class_num), name="logits_normal") # logits_pw_masked [seq_len1+seq_len2+..+seq_lenn, 2] logits_masked = tf.boolean_mask(tensor=logits_normal, mask=self.mask, name="logits_masked") #print("logits_masked.shape", logits_masked.shape) # softmax prob_masked = tf.nn.softmax(logits=logits_masked, axis=-1, name="prob_pw_masked") #print("prob_masked.shape", prob_masked.shape) # prediction # pred:[batch_size*max_time,] pred = tf.cast(tf.argmax(logits, 1), tf.int32, name="pred") # pred in an normal way,[batch_size, max_time] pred_normal = tf.reshape(tensor=pred, shape=(-1, self.max_sentence_size), name="pred_normal") # one-hot the pred_normal:[batch_size, max_time,class_num] pred_normal_one_hot = tf.one_hot(indices=pred_normal, depth=self.class_num, name="pred_normal_one_hot") # pred_masked [seq_len1+seq_len2+....+,] pred_masked = tf.boolean_mask(tensor=pred_normal, mask=self.mask, name="pred_masked") # loss loss = tf.losses.sparse_softmax_cross_entropy( labels=y_masked, logits=logits_masked) + tf.contrib.layers.l2_regularizer( self.lambda_pw)(weight) return loss, prob_masked, pred, pred_masked, pred_normal_one_hot
#------------------------------------construct LSTM------------------------------------------# #place hoder X_p = tf.placeholder(dtype=tf.float32, shape=(None, TIME_STEPS, 28), name="input_placeholder") y_p = tf.placeholder(dtype=tf.float32, shape=(None, 10), name="pred_placeholder") #gru instance gru_forward_1 = tf.nn.rnn_cell.GRUCell( num_units=HIDDEN_UNITS1, kernel_initializer=initializers.xavier_initializer(), bias_initializer=tf.initializers.random_normal()) gru_forward_1 = rnn.AttentionCellWrapper(cell=gru_forward_1, attn_length=5) gru_forward_2 = tf.nn.rnn_cell.GRUCell( num_units=HIDDEN_UNITS, kernel_initializer=initializers.xavier_initializer(), bias_initializer=tf.initializers.random_normal()) gru_forward_2 = rnn.AttentionCellWrapper(cell=gru_forward_2, attn_length=5) gru_forward = rnn.MultiRNNCell(cells=[gru_forward_1, gru_forward_2]) gru_backward_1 = tf.nn.rnn_cell.GRUCell( num_units=HIDDEN_UNITS1, kernel_initializer=initializers.xavier_initializer(), bias_initializer=tf.initializers.random_normal()) gru_backward_1 = rnn.AttentionCellWrapper(cell=gru_backward_1, attn_length=5)
def __init__(self, sequence_length, embedding_size, previous_component, num_layers, bidirectional, attn_length, attn_size, attn_vec_size): """ Args: num_layers: The number of layers of the rnn model. bidirectional: boolean, Whether this is a bidirectional rnn. sequence_length: If sequence_length is provided, dynamic calculation is performed. This saves computational time when unrolling past max sequence length. Required for bidirectional RNNs. initial_state: An initial state for the RNN. This must be a tensor of appropriate type and shape [batch_size x cell.state_size]. attn_length: integer, the size of attention vector attached to rnn cells. attn_size: integer, the size of an attention window attached to rnn cells. attn_vec_size: integer, the number of convolutional features calculated on attention state and the size of the hidden layer built from base cell state. """ x = previous_component.embedded_expanded n_nodes = embedding_size if bidirectional: # forward direction cell fw_cell = rnn.GRUCell(n_nodes) bw_cell = rnn.GRUCell(n_nodes) # attach attention cells if specified if attn_length is not None: fw_cell = rnn.AttentionCellWrapper(fw_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) bw_cell = rnn.AttentionCellWrapper(bw_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) rnn_fw_cell = rnn.MultiRNNCell([fw_cell] * num_layers, state_is_tuple=False) # backward direction cell rnn_bw_cell = rnn.MultiRNNCell([bw_cell] * num_layers, state_is_tuple=False) outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_dynamic_rnn( cells_fw=rnn_fw_cell, cells_bw=rnn_bw_cell, inputs=x, dtype=tf.dtypes.float32, sequence_length=sequence_length) self.last_layer = outputs else: rnn_cell = rnn.GRUCell(n_nodes) if attn_length is not None: rnn_cell = rnn.AttentionCellWrapper( rnn_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) cell = rnn.MultiRNNCell([rnn_cell] * num_layers, state_is_tuple=False) outputs, state = rnn.static_rnn(cell, x, dtype=tf.dtypes.float32, sequence_length=sequence_length) self.last_layer = outputs