def __init__(self, hidden_size, keep_prob): self.hidden_size = hidden_size # this should be 2 * self.FLAGS.hidden_size self.keep_prob = keep_prob self.fwd = DropoutWrapper(rnn_cell.LSTMCell(self.hidden_size / 2), input_keep_prob=self.keep_prob) self.back = DropoutWrapper(rnn_cell.LSTMCell(self.hidden_size / 2), input_keep_prob=self.keep_prob)
def compute_states(self,emb): def unpack_sequence(tensor): return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2])) with tf.variable_scope("Composition",initializer= tf.contrib.layers.xavier_initializer(),regularizer= tf.contrib.layers.l2_regularizer(self.reg)): cell_fw = rnn_cell.LSTMCell(self.hidden_dim) cell_bw = rnn_cell.LSTMCell(self.hidden_dim) #tf.cond(tf.less(self.dropout #if tf.less(self.dropout, tf.constant(1.0)): cell_fw = rnn_cell.DropoutWrapper(cell_fw, output_keep_prob=self.dropout,input_keep_prob=self.dropout) cell_bw=rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=self.dropout,input_keep_prob=self.dropout) #output, state = rnn.dynamic_rnn(cell,emb,sequence_length=self.lngths,dtype=tf.float32) outputs,_,_=rnn.bidirectional_rnn(cell_fw,cell_bw,unpack_sequence(emb),sequence_length=self.lngths,dtype=tf.float32) #output = pack_sequence(outputs) sum_out=tf.reduce_sum(tf.stack(outputs),[0]) sent_rep = tf.div(sum_out,tf.expand_dims(tf.to_float(self.lngths),1)) final_state=sent_rep return final_state
def RNN(x, is_training, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_time_step, x) lstm_cell_1 = rnn_cell.LSTMCell(n_hidden_1, forget_bias=0.8) lstm_cell_2 = rnn_cell.LSTMCell(n_hidden_2, forget_bias=0.8) if is_training and keep_prob < 1: lstm_cell_1 = rnn_cell.DropoutWrapper(lstm_cell_1, output_keep_prob=keep_prob) lstm_cell_2 = rnn_cell.DropoutWrapper(lstm_cell_2, output_keep_prob=keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell_1, lstm_cell_2]) #if is_training and keep_prob < 1: # x = tf.nn.dropout(x,keep_prob) #initial_state = cell.zero_state(batch_size,tf.float32) #state = initial_state output = [] output, states = rnn.rnn(cell, x, dtype=tf.float32) #outputs = tf.reshape(tf.concat(1,output),[-1,n_hidden_2]) #maybe a softmax return tf.matmul(output[-1], weights['out']) + biases['out']
def bidir_ltsm(x): with tf.name_scope('Weights'): # Permuting batch_size and n_steps #x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, model.num_features]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, model.max_timesteps, x) weights_out1 = tf.Variable(tf.truncated_normal([2, num_hidden], stddev=np.sqrt(1./num_hidden)), name='weights') biases_out1 = tf.Variable(tf.zeros(num_hidden), name='biases') weights_out2 = tf.Variable(tf.truncated_normal([2, num_hidden], stddev=np.sqrt(1./num_hidden)), name='weights') biases_out2 = tf.Variable(tf.zeros(num_hidden), name='biases') with tf.name_scope('LTSM'): forward = rnn_cell.LSTMCell(num_hidden, use_peepholes=True, forget_bias=1.0) backward = rnn_cell.LSTMCell(num_hidden, use_peepholes=True, forget_bias=1.0) with tf.name_scope('Bidirectionrnn'): bidirectional_h1, _, _ = bidirectional_rnn(forward, backward, x, dtype=tf.float32) bd_h1s = [tf.reshape(t, [batch_size, 2, num_hidden]) for t in bidirectional_h1] with tf.name_scope('logits'): weights_class = tf.Variable(tf.truncated_normal([num_hidden, model.num_classes], stddev=np.sqrt(1./num_hidden)), name='weights') biases_class = tf.Variable(tf.zeros([model.num_classes])) out_h1 = [tf.reduce_sum(tf.mul(t, weights_out1), reduction_indices=1) + biases_out1 for t in bd_h1s] logits = [tf.matmul(t, weights_class) + biases_class for t in out_h1] logits3d = tf.pack(logits) return logits3d
def _createStackBidirectionalDynamicRNN(self, use_gpu, use_shape, use_state_tuple, initial_states_fw=None, initial_states_bw=None, scope=None): self.layers = [2, 3] input_size = 5 batch_size = 2 max_length = 8 initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=self._seed) sequence_length = array_ops.placeholder(dtypes.int64) self.cells_fw = [ rnn_cell.LSTMCell(num_units, input_size, initializer=initializer, state_is_tuple=False) for num_units in self.layers ] self.cells_bw = [ rnn_cell.LSTMCell(num_units, input_size, initializer=initializer, state_is_tuple=False) for num_units in self.layers ] inputs = max_length * [ array_ops.placeholder( dtypes.float32, shape=(batch_size, input_size) if use_shape else (None, input_size)) ] inputs_c = array_ops.stack(inputs) inputs_c = array_ops.transpose(inputs_c, [1, 0, 2]) outputs, st_fw, st_bw = contrib_rnn.stack_bidirectional_dynamic_rnn( self.cells_fw, self.cells_bw, inputs_c, initial_states_fw=initial_states_fw, initial_states_bw=initial_states_bw, dtype=dtypes.float32, sequence_length=sequence_length, scope=scope) # Outputs has shape (batch_size, max_length, 2* layer[-1]. output_shape = [None, max_length, 2 * self.layers[-1]] if use_shape: output_shape[0] = batch_size self.assertAllEqual(outputs.get_shape().as_list(), output_shape) input_value = np.random.randn(batch_size, input_size) return input_value, inputs, outputs, st_fw, st_bw, sequence_length
def bilstm_layer(self, inputs): # bidirectional lstm layer for feature extration with tf.variable_scope("BiLSTM"): fw_cell = rnn_cell.LSTMCell(self.params.word_hidden_dim, use_peepholes=True, initializer=self.initializer()) bw_cell = rnn_cell.LSTMCell(self.params.word_hidden_dim, use_peepholes=True, initializer=self.initializer()) length64 = tf.cast(self.lengths, tf.int64) forward_output, _ = tf.nn.dynamic_rnn(fw_cell, inputs, dtype=tf.float32, sequence_length=self.lengths, scope="fw") backward_output, _ = tf.nn.dynamic_rnn( bw_cell, tf.reverse_sequence(inputs, length64, seq_dim=1), dtype=tf.float32, sequence_length=self.lengths, scope="bw") backward_output = tf.reverse_sequence(backward_output, length64, seq_dim=1) # concat forward and backward outputs into a 2*hiddenSize vector outputs = tf.concat(2, [forward_output, backward_output]) lstm_features = tf.reshape(outputs, [-1, self.params.word_hidden_dim * 2]) return lstm_features
def __init__(self, hidden_size, keep_prob): """ Inputs: hidden_size: int. Hidden size of the RNN keep_prob: Tensor containing a single scalar that is the keep probability (for dropout) """ self.hidden_size = hidden_size self.keep_prob = keep_prob #self.rnn_cell_fw = rnn_cell.GRUCell(self.hidden_size) self.rnn_cell_fw_layer1 = rnn_cell.LSTMCell(self.hidden_size) self.rnn_cell_fw_layer1 = DropoutWrapper( self.rnn_cell_fw_layer1, input_keep_prob=self.keep_prob) #self.rnn_cell_bw = rnn_cell.GRUCell(self.hidden_size) self.rnn_cell_bw_layer1 = rnn_cell.LSTMCell(self.hidden_size) self.rnn_cell_bw_layer1 = DropoutWrapper( self.rnn_cell_bw_layer1, input_keep_prob=self.keep_prob) self.rnn_cell_fw_layer2 = rnn_cell.LSTMCell(self.hidden_size) self.rnn_cell_fw_layer2 = DropoutWrapper( self.rnn_cell_fw_layer2, input_keep_prob=self.keep_prob) self.rnn_cell_bw_layer2 = rnn_cell.LSTMCell(self.hidden_size) self.rnn_cell_bw_layer2 = DropoutWrapper( self.rnn_cell_bw_layer2, input_keep_prob=self.keep_prob)
def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, enc_timesteps, bias_init_vector=None): self.dim_image = np.int(dim_image) self.dim_embed = np.int(dim_embed) self.dim_hidden = np.int(dim_hidden) self.batch_size = np.int(batch_size) self.n_lstm_steps = np.int(n_lstm_steps) self.n_words = np.int(n_words) self.enc_timesteps = np.int(enc_timesteps) with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform( [n_words, dim_embed], -0.1, 0.1), name='Wemb') self.bemb = self.init_bias(dim_embed, name='bemb') self.lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True) self.lstm = rnn_cell.DropoutWrapper(self.lstm, input_keep_prob=1) self.lstm = rnn_cell.MultiRNNCell([self.lstm ]) self.back_lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True) self.back_lstm = rnn_cell.DropoutWrapper(self.back_lstm, input_keep_prob=1) self.back_lstm = rnn_cell.MultiRNNCell([self.back_lstm]) self.encode_img_W = tf.Variable(tf.random_uniform( [dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W') self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b') self.embed_word_W = tf.Variable(tf.random_uniform( [dim_hidden, n_words], -0.1, 0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable( bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
def BiRNN(x, weights, biases): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn_cell.LSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn_cell.LSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output try: outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def inference(images, eval=False): NUM_HIDDEN = 128 W = _variable_with_weight_decay('weight', [2 * NUM_HIDDEN, NUM_CLASSES], 0.001, 0.01) b = _variable_with_weight_decay('bias', [NUM_CLASSES], 0.001, 0) s = images.get_shape() n_batches, n_steps, n_features = int(s[0]), int(s[1]), int(s[2]) #print(n_batches, n_steps, n_features) inputs = tf.reshape(images, [-1, n_features]) inputs = tf.split(0, n_steps, inputs) fw_cell = rnn_cell.LSTMCell(NUM_HIDDEN, forget_bias=1.0, state_is_tuple=True) bw_cell = rnn_cell.LSTMCell(NUM_HIDDEN, forget_bias=1.0, state_is_tuple=True) try: outputs, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype=tf.float32) except Exception: outputs = rnn.bidirectional_rnn(fw_cell, bw_cell, x, dtype=tf.float32) logits = tf.matmul(outputs[-1], W) + b return logits
def __init__(self, hidden_size, keep_prob, cell_type="gru", scope="encoder"): """ Inputs: hidden_size: int. Hidden size of the RNN keep_prob: Tensor containing a single scalar that is the keep probability (for dropout) """ self.hidden_size = hidden_size self.keep_prob = keep_prob if cell_type == "gru": rnn_cell_fw = rnn_cell.GRUCell(self.hidden_size) rnn_cell_bw = rnn_cell.GRUCell(self.hidden_size) elif cell_type == "lstm": rnn_cell_fw = rnn_cell.LSTMCell(self.hidden_size) rnn_cell_bw = rnn_cell.LSTMCell(self.hidden_size) else: assert (False, "No such cell type for RNN encoder!") self.rnn_cell_fw = DropoutWrapper(rnn_cell_fw, input_keep_prob=self.keep_prob) self.rnn_cell_bw = DropoutWrapper(rnn_cell_bw, input_keep_prob=self.keep_prob) self.scope = scope logger.info("Encoder created: {} | hidden_size = {}".format( cell_type, hidden_size))
def LSTM_Network(input_, weightsOutH1, weightsClasses, biasesOutH1, biasesClasses): ####Network forwardH1 = rnn_cell.LSTMCell(nHidden, use_peepholes=True, state_is_tuple=True) backwardH1 = rnn_cell.LSTMCell(nHidden, use_peepholes=True, state_is_tuple=True) fbH1, _, _ = bidirectional_rnn(forwardH1, backwardH1, inputList, dtype=tf.float32, scope='BDLSTM_H1') fbH1rs = [tf.reshape(t, [batchSize, 2, nHidden]) for t in fbH1] outH1 = [ tf.reduce_sum(tf.mul(t, weightsOutH1), reduction_indices=1) + biasesOutH1 for t in fbH1rs ] logits = [tf.matmul(t, weightsClasses) + biasesClasses for t in outH1] ####Optimizing logits3d = tf.pack(logits) return logits3d
def _createStackBidirectionalRNN(self, use_gpu, use_shape, use_sequence_length, initial_states_fw=None, initial_states_bw=None, scope=None): self.layers = [2, 3] input_size = 5 batch_size = 2 max_length = 8 initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=self._seed) sequence_length = array_ops.placeholder( dtypes.int64) if use_sequence_length else None self.cells_fw = [ rnn_cell.LSTMCell(num_units, input_size, initializer=initializer, state_is_tuple=False) for num_units in self.layers ] self.cells_bw = [ rnn_cell.LSTMCell(num_units, input_size, initializer=initializer, state_is_tuple=False) for num_units in self.layers ] inputs = max_length * [ array_ops.placeholder( dtypes.float32, shape=(batch_size, input_size) if use_shape else (None, input_size)) ] outputs, state_fw, state_bw = contrib_rnn.stack_bidirectional_rnn( self.cells_fw, self.cells_bw, inputs, initial_states_fw, initial_states_bw, dtype=dtypes.float32, sequence_length=sequence_length, scope=scope) self.assertEqual(len(outputs), len(inputs)) for out in outputs: self.assertAlmostEqual( out.get_shape().as_list(), [batch_size if use_shape else None, 2 * self.layers[-1]]) input_value = np.random.randn(batch_size, input_size) outputs = array_ops.stack(outputs) return input_value, inputs, outputs, state_fw, state_bw, sequence_length
def RNN(x, weight, bias): cell1 = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell2 = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell = rnn_cell.MultiRNNCell([cell1, cell2]) output, state = tf.nn.dynamic_rnn(cell, x, dtype = tf.float32) output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) return tf.nn.softmax(tf.matmul(last, weight) + bias, name="pred")
def get_train_model(): x, y, params = get_training_model() inputs = tf.placeholder(tf.float32, [None, None, common.OUTPUT_SHAPE[0]]) # Here we use sparse_placeholder that will generate a # SparseTensor required by ctc_loss op. targets = tf.sparse_placeholder(tf.int32) # 1d array of size [batch_size] seq_len = tf.placeholder(tf.int32, [None]) # Defining the cell for forward and backward layer forwardH1 = rnn_cell.LSTMCell(common.num_hidden, use_peepholes=True, state_is_tuple=True) backwardH1 = rnn_cell.LSTMCell(common.num_hidden, use_peepholes=True, state_is_tuple=True) # The second output previous state and is ignored outputs, _ = tf.nn.bidirectional_dynamic_rnn(forwardH1, backwardH1, x, seq_len, dtype=tf.float32) outputs = tf.concat(2, outputs) shape = tf.shape(inputs) batch_s, max_timesteps = shape[0], shape[1] weights = tf.Variable(tf.truncated_normal( [common.num_hidden, common.num_classes], stddev=0.4), name="weights") # Reshaping to apply the same weights over the timesteps outputs = tf.reshape(outputs, [-1, 2 * common.num_hidden]) # Truncated normal with mean 0 and stdev=0.5 W = tf.Variable(tf.truncated_normal( [2 * common.num_hidden, common.num_classes], stddev=0.5), name="W") # Zero initialization b = tf.zeros(shape=[common.num_classes], name='b') # Doing the affine projection logits = tf.matmul(outputs, W) + b # Reshaping back to the original shape logits = tf.reshape(logits, [batch_s, -1, common.num_classes]) # Time major logits = tf.transpose(logits, (1, 0, 2)) return logits, inputs, targets, seq_len, W, b
def __init__(self, seq_length, vocab_size, stack_dimension, batch_size): config = tf.ConfigProto(allow_soft_placement=True) self.sess = tf.Session(config=config) self.seq_length = seq_length self.vocab_size = vocab_size self.memory_dim = vocab_size self.enc_inp = [ tf.placeholder(tf.float32, shape=(vocab_size, batch_size), name="enc_inp%i" % t) for t in range(seq_length) ] self.dec_inp = self.enc_inp[:-1] + [ tf.zeros_like(self.enc_inp[0], dtype=np.float32, name="GO") ] single_enc_cell = rnn_cell.LSTMCell(self.memory_dim, state_is_tuple=False) self.enc_cell = rnn_cell.MultiRNNCell([single_enc_cell] * stack_dimension, state_is_tuple=True) _, encoder_state = rnn.rnn(self.enc_cell, self.enc_inp, dtype=tf.float32) single_dec_cell = rnn_cell.LSTMCell(self.memory_dim, state_is_tuple=False) self.dec_cell = rnn_cell.MultiRNNCell([single_dec_cell] * stack_dimension, state_is_tuple=True) self.Ws = tf.Variable( tf.random_uniform([self.memory_dim, self.vocab_size], 0, 0.1)) self.bs = tf.Variable(tf.random_uniform([self.vocab_size], -0.1, 0.1)) self.dec_outputs, self.dec_state = rnn_decoder( self.dec_inp, encoder_state, self.dec_cell, self.Ws, self.bs, vocab_size, batch_size, self.memory_dim) self.labels = [ tf.placeholder(tf.float32, [vocab_size, batch_size], name='LABEL%i' % t) for t in range(seq_length) ] self.weights = [ tf.ones_like(labels_t, dtype=tf.float32) for labels_t in self.labels ] self.loss = loss(self.labels, self.dec_outputs) self.train_op = tf.train.AdamOptimizer(1e-3).minimize(self.loss) self.sess.run(tf.initialize_all_variables())
def __init__(self, hidden_size, keep_prob,model_name="RNNModelEncoder"): """ Inputs: hidden_size: int. Hidden size of the RNN keep_prob: Tensor containing a single scalar that is the keep probability (for dropout) """ self.hidden_size = hidden_size self.keep_prob = keep_prob self.rnn_cell_fw = rnn_cell.LSTMCell(self.hidden_size) self.rnn_cell_fw = DropoutWrapper(self.rnn_cell_fw, input_keep_prob=self.keep_prob) self.rnn_cell_bw = rnn_cell.LSTMCell(self.hidden_size) self.rnn_cell_bw = DropoutWrapper(self.rnn_cell_bw, input_keep_prob=self.keep_prob) self.model_name=model_name
def __init__(self, input_size, output_size): """ Inputs: input_size: the dimension of the input states output_size: the dimension of the output states for each direction of the BiLSTM. """ self.input_size = input_size self.output_size = output_size self.rnn_cell_fw = rnn_cell.LSTMCell(num_units=self.input_size, num_proj=self.output_size) self.rnn_cell_bw = rnn_cell.LSTMCell(num_units=self.input_size, num_proj=self.output_size)
def inference(inputs, n_input, n_steps, n_hidden, n_classes): W = tf.Variable(tf.random_normal([2*n_hidden, n_classes])) b = tf.Variable(tf.random_normal([n_classes])) inputs = tf.reshape(inputs, [-1, n_input]) inputs = tf.split(0, n_steps, inputs) fw_cell = rnn_cell.LSTMCell(n_hidden, forget_bias = 1.0, state_is_tuple = True) bw_cell = rnn_cell.LSTMCell(n_hidden, forget_bias = 1.0, state_is_tuple = True) try: outputs,_,_ = rnn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype = tf.float32) except Exception: outputs = rnn.bidirectional_rnn(fw_cell, bw_cell, x, dtype = tf.float32) return tf.matmul(outputs[-1], W) + b
def __init__(self, keep_prob, key_vec_size, value_vec_size): """ Inputs: keep_prob: tensor containing a single scalar that is the keep probability (for dropout) key_vec_size: size of the key vectors. int value_vec_size: size of the value vectors. int """ self.keep_prob = keep_prob self.key_vec_size = key_vec_size self.value_vec_size = value_vec_size self.rnn_cell_fw = rnn_cell.LSTMCell(value_vec_size/2, reuse=tf.AUTO_REUSE) self.rnn_cell_fw = DropoutWrapper(self.rnn_cell_fw, input_keep_prob=self.keep_prob) self.rnn_cell_bw = rnn_cell.LSTMCell(value_vec_size/2, reuse=tf.AUTO_REUSE) self.rnn_cell_bw = DropoutWrapper(self.rnn_cell_bw, input_keep_prob=self.keep_prob)
def BRNN(x, weight, bias): cell1_fw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell2_fw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell_fw = rnn_cell.MultiRNNCell([cell1_fw, cell2_fw]) cell1_bw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell2_bw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) cell_bw = rnn_cell.MultiRNNCell([cell1_bw, cell2_bw]) output, out_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x, dtype = tf.float32) # print(output[-1].get_shape().as_list()) output = tf.transpose(output[-1], [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) return tf.nn.softmax(tf.matmul(last, weight) + bias, name="pred")
def __init__(self, keep_prob, qn_vec_size, cxt_vec_size): """ Inputs: keep_prob: tensor containing a single scalar that is the keep probability (for dropout) qn_vec_size: size of the question vectors. int cxt_vec_size: size of the context vectors. int """ self.keep_prob = keep_prob self.qn_vec_size = qn_vec_size self.cxt_vec_size = cxt_vec_size self.rnn_cell_fw = rnn_cell.LSTMCell(cxt_vec_size/2, reuse=tf.AUTO_REUSE) self.rnn_cell_fw = DropoutWrapper(self.rnn_cell_fw, input_keep_prob=self.keep_prob) self.rnn_cell_bw = rnn_cell.LSTMCell(cxt_vec_size/2, reuse=tf.AUTO_REUSE) self.rnn_cell_bw = DropoutWrapper(self.rnn_cell_bw, input_keep_prob=self.keep_prob)
def build_graph(self): input_lens = tf.reduce_sum(self.X_mask_placeholder, axis=1) inputs = self.X_placeholder char_embedding = self.convolve() inputs = tf.concat([self.X_placeholder, char_embedding], axis=-1) inputs = tf.concat([inputs, self.features], axis=-1) for i in range(0, self.depth): lstm_cell_forward = rnn_cell.LSTMCell(self.hidden_size) lstm_cell_forward = DropoutWrapper(lstm_cell_forward, input_keep_prob=self.keep_prob) lstm_cell_backward = rnn_cell.LSTMCell(self.hidden_size) lstm_cell_backward = DropoutWrapper(lstm_cell_backward, input_keep_prob=self.keep_prob) (fw_out, bw_out), _ = tf.nn.bidirectional_dynamic_rnn( lstm_cell_forward, lstm_cell_backward, inputs, input_lens, dtype=tf.float32, scope='layer' + str(i)) out = tf.concat([fw_out, bw_out], 2) out = tf.nn.dropout(out, self.keep_prob) inputs = out h = tf.contrib.layers.fully_connected(out, num_outputs=self.hidden_size, activation_fn=tf.nn.relu) rows = tf.range(0, tf.shape(input_lens)[-1]) indices = tf.subtract(input_lens, tf.ones_like(input_lens)) indices = tf.nn.relu(indices) slicer = tf.stack([rows, indices], axis=1) h = tf.gather_nd(h, slicer) weights = tf.get_variable( "W", shape=[self.hidden_size, self.num_classes], initializer=tf.contrib.layers.xavier_initializer()) bias = tf.get_variable("b", shape=[self.num_classes], initializer=tf.zeros_initializer()) logits = tf.nn.xw_plus_b(h, weights, bias, name="logits") preds = tf.argmax(logits, 1) return logits, preds
def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate): self.rnn_size = rnn_size self.rnn_layer = rnn_layer self.batch_size = batch_size self.input_embedding_size = input_embedding_size self.dim_image = dim_image self.dim_hidden = dim_hidden self.max_words_q = max_words_q self.vocabulary_size = vocabulary_size self.drop_out_rate = drop_out_rate # Before-LSTM-embedding self.embed_BLSTM_Q_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_Q_W') self.embed_BLSTM_A_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_A_W') # encoder: RNN body self.lstm_1_q = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_1_q = rnn_cell.DropoutWrapper(self.lstm_1_q, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2_q = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_2_q = rnn_cell.DropoutWrapper(self.lstm_2_q, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm_q = rnn_cell.MultiRNNCell([self.lstm_dropout_1_q, self.lstm_dropout_2_q],state_is_tuple=False) self.lstm_1_a = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_1_a = rnn_cell.DropoutWrapper(self.lstm_1_a, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2_a = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False) self.lstm_dropout_2_a = rnn_cell.DropoutWrapper(self.lstm_2_a, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm_a = rnn_cell.MultiRNNCell([self.lstm_dropout_1_a, self.lstm_dropout_2_a],state_is_tuple=False) # question-embedding W1 self.embed_Q_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_Q_W') self.embed_Q_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_Q_b') # Answer-embedding W3 self.embed_A_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_A_W') self.embed_A_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_A_b') # image-embedding W2 self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W') self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b') # score-embedding W4 #self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') #self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b') self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b') # QI-embedding W3 self.embed_QI_W = tf.Variable(tf.random_uniform([dim_hidden, dim_hidden], -0.08, 0.08), name='embed_QI_W') self.embed_QI_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_QI_b')
def _create_encoder(self, args): # Create LSTM portion of network lstm = rnn_cell.LSTMCell(args.encoder_size, state_is_tuple=True, initializer=initializers.xavier_initializer()) self.full_lstm = rnn_cell.MultiRNNCell([lstm] * args.num_encoder_layers, state_is_tuple=True) self.lstm_state = self.full_lstm.zero_state(args.batch_size, tf.float32) # Forward pass encoder_input = tf.concat(1, [self.states_encode, self.actions_encode]) output, self.final_state = seq2seq.rnn_decoder([encoder_input], self.lstm_state, self.full_lstm) output = tf.reshape(tf.concat(1, output), [-1, args.encoder_size]) # Fully connected layer to latent variable distribution parameters W = tf.get_variable("latent_w", [args.encoder_size, 2 * args.z_dim], initializer=initializers.xavier_initializer()) b = tf.get_variable("latent_b", [2 * args.z_dim]) logits = tf.nn.xw_plus_b(output, W, b) # Separate into mean and logstd self.z_mean, self.z_logstd = tf.split(1, 2, logits)
def test_temporal_classification_sequential_tf_rnn(self): with self.cached_session(): np.random.seed(1337) (x_train, y_train), _ = testing_utils.get_test_data(train_samples=100, test_samples=0, input_shape=(4, 10), num_classes=2) y_train = keras.utils.to_categorical(y_train) model = keras.models.Sequential() model.add( keras.layers.RNN(rnn_cell.LSTMCell(5), return_sequences=True, input_shape=x_train.shape[1:])) model.add( keras.layers.RNN( rnn_cell.GRUCell(y_train.shape[-1], activation='softmax', dtype=dtypes.float32))) model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=0.1), metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=15, batch_size=16, validation_data=(x_train, y_train), verbose=2) self.assertGreater(history.history['val_acc'][-1], 0.7)
def testCustomizedAttention(self): batch_size = 2 max_time = 3 num_units = 2 memory = constant_op.constant([[[1., 1.], [2., 2.], [3., 3.]], [[4., 4.], [5., 5.], [6., 6.]]]) memory_sequence_length = constant_op.constant([3, 2]) attention_mechanism = wrapper.BahdanauAttention(num_units, memory, memory_sequence_length) # Sets all returned values to be all ones. def _customized_attention(unused_attention_mechanism, unused_cell_output, unused_attention_state, unused_attention_layer): """Customized attention. Returns: attention: `Tensor` of shape [batch_size, num_units], attention output. alignments: `Tensor` of shape [batch_size, max_time], sigma value for each input memory (prob. function of input keys). next_attention_state: A `Tensor` representing the next state for the attention. """ attention = array_ops.ones([batch_size, num_units]) alignments = array_ops.ones([batch_size, max_time]) next_attention_state = alignments return attention, alignments, next_attention_state attention_cell = wrapper.AttentionWrapper( rnn_cell.LSTMCell(2), attention_mechanism, attention_layer_size=None, # don't use attention layer. output_attention=False, alignment_history=(), attention_fn=_customized_attention, name='attention') self.assertEqual(num_units, attention_cell.output_size) initial_state = attention_cell.zero_state( batch_size=2, dtype=dtypes.float32) source_input_emb = array_ops.ones([2, 3, 2]) source_input_length = constant_op.constant([3, 2]) # 'state' is a tuple of # (cell_state, h, attention, alignments, alignment_history, attention_state) output, state = rnn.dynamic_rnn( attention_cell, inputs=source_input_emb, sequence_length=source_input_length, initial_state=initial_state, dtype=dtypes.float32) with self.session() as sess: sess.run(variables.global_variables_initializer()) output_value, state_value = sess.run([output, state], feed_dict={}) self.assertAllEqual(np.array([2, 3, 2]), output_value.shape) self.assertAllClose(np.array([[1., 1.], [1., 1.]]), state_value.attention) self.assertAllClose( np.array([[1., 1., 1.], [1., 1., 1.]]), state_value.alignments) self.assertAllClose( np.array([[1., 1., 1.], [1., 1., 1.]]), state_value.attention_state)
def testLuongScaledDType(self): # Test case for GitHub issue 18099 for dt in [np.float16, np.float32, np.float64]: num_units = 128 encoder_outputs = array_ops.placeholder(dt, shape=[64, None, 256]) encoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) decoder_inputs = array_ops.placeholder(dt, shape=[64, None, 128]) decoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) batch_size = 64 attention_mechanism = wrapper.LuongAttention( num_units=num_units, memory=encoder_outputs, memory_sequence_length=encoder_sequence_length, scale=True, dtype=dt, ) cell = rnn_cell.LSTMCell(num_units) cell = wrapper.AttentionWrapper(cell, attention_mechanism) helper = helper_py.TrainingHelper(decoder_inputs, decoder_sequence_length) my_decoder = basic_decoder.BasicDecoder( cell=cell, helper=helper, initial_state=cell.zero_state( dtype=dt, batch_size=batch_size)) final_outputs, final_state, _ = decoder.dynamic_decode(my_decoder) self.assertTrue( isinstance(final_outputs, basic_decoder.BasicDecoderOutput)) self.assertEqual(final_outputs.rnn_output.dtype, dt) self.assertTrue( isinstance(final_state, wrapper.AttentionWrapperState)) self.assertTrue( isinstance(final_state.cell_state, rnn_cell.LSTMStateTuple))
def lstm(self): """ prepare the input shape for the lstm, the oringinal shape is (batch_size, seq_size, input_dim) but must be transformed to a tensor list with the length seq_size, of the shape (batch_size, input_dim) must copy self.X to a new tensor X """ X = self.X #permute batch_size and seq_size X = tf.transpose( X, [1, 0, 2 ]) #the [1] becomes [0], [0] becomes [1], [2] stays the same #reshape to (seq_size*batch_size, input_dim) X = tf.reshape(X, [-1, self.input_dim]) #split the list of tensors X = tf.split(X, self.seq_size) #create lstm and add dropout lstm_cell = rnn_cell.LSTMCell(self.hidden_dim, use_peepholes=True) lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob) outputs, states = rnn.dynamic_rnn(lstm_cell, X, dtype=tf.float32) output = tf.matmul( outputs[-1], self.weights) + self.biases #the last output and process return output
def benchmarkTfRNNLSTMTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/gpu:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127) cell = rnn_cell.LSTMCell(num_units=num_units, initializer=initializer, state_is_tuple=True) multi_cell = rnn_cell.MultiRNNCell( [cell() for _ in range(num_layers)]) outputs, final_state = core_rnn.static_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp( training_op, "tf_rnn_lstm %s %s" % (config_name, self._GetConfigDesc(config)))