def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) # Define a lstm cell with tensorflow lstm_cell1 = rnn.LSTMBlockCell(num_hidden, forget_bias=1.0) #lstm_cell = rnn.BasicRNNCell(num_hidden) #lstm_cell = rnn.PhasedLSTMCell(num_hidden) #lstm_cell2 = rnn.PhasedLSTMCell(num_hidden) lstm_cell1 = tf.nn.rnn_cell.DropoutWrapper(lstm_cell1, output_keep_prob=0.75) lstm_cell2 = rnn.LSTMBlockCell(num_hidden, forget_bias=1.0, use_peephole=True) lstm_cell1 = tf.nn.rnn_cell.DropoutWrapper(lstm_cell1, output_keep_prob=0.75) lstm_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell1, lstm_cell2] * 4) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def bi_lstm_class(input_, n_hidden=256, n_steps=32, n_input=54, num_class=10, name='class_bi_lstm'): with tf.variable_scope(name): input_x = tf.unstack(input_, n_steps, 1) lstm_fw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0) lstm_bw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0) x = [] for i in range(n_steps - 1): x.append(tf.concat([input_x[i], input_x[i + 1] - input_x[i]], 1)) try: outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell], [lstm_bw_cell], x, dtype=tf.float32) except Exception: outputs = rnn.stack_bidirectional_rnn([lstm_fw_cell], [lstm_bw_cell], x, dtype=tf.float32) h = tf.concat(outputs, 1) h, h_w, h_b = linear(h, 1024, 'd_h3_lin', with_w=True) h = tf.nn.relu(h) h, h_w, h_b = linear(h, num_class, 'd_h4_lin', with_w=True) return h
def test_multi_rnn_lstm(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") cell_0 = rnn.LSTMBlockCell(units) cell_1 = rnn.LSTMBlockCell(units) cell_2 = rnn.LSTMBlockCell(units) cells = rnn.MultiRNNCell([cell_0, cell_1, cell_2], state_is_tuple=True) outputs, cell_state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") input_names_with_port = ["input_1:0"] feed_dict = {"input_1:0": x_val} output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-03, atol=1e-06)
def test_dynamic_bilstm_state_consumed_only(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # bilstm, no scope cell1 = rnn.LSTMBlockCell(units) cell2 = rnn.LSTMBlockCell(units) _, cell_state = tf.nn.bidirectional_dynamic_rnn(cell1, cell2, x, dtype=tf.float32) _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_lstm_count(g, 1))
def bilstm_filter(input, mask, keep_prob, prefix='lstm', dim=50, is_training=True): with tf.variable_scope(name_or_scope=prefix, reuse=tf.AUTO_REUSE): sequence = tf.cast(tf.reduce_sum(mask, 1), tf.int32) lstm_fw_cell = rnn.LSTMBlockCell( dim, forget_bias=1.0 ) # initializer=tf.orthogonal_initializer(), state_is_tuple=True # back directions lstm_bw_cell = rnn.LSTMBlockCell(dim, forget_bias=1.0) keep_rate = tf.cond(is_training is not False and keep_prob < 1, lambda: 0.8, lambda: 1.0) cell_dp_fw = rnn.DropoutWrapper(cell=lstm_fw_cell, output_keep_prob=keep_rate) cell_dp_bw = rnn.DropoutWrapper(cell=lstm_bw_cell, output_keep_prob=keep_rate) outputs, _ = tf.nn.bidirectional_dynamic_rnn( cell_dp_fw, cell_dp_bw, input, sequence_length=sequence, swap_memory=False, dtype=tf.float32) # batch major return outputs
def BiRNN(x): # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) # Define lstm cells with tensorflow # Forward direction cell #lstm_fw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2) #, use_peepholes=True) lstm_fw_cell = rnn.DropoutWrapper( rnn.LSTMBlockCell(n_hidden, forget_bias=1.0), self.keep_prob2) # Backward direction cell #lstm_bw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2, use_peepholes=True) lstm_bw_cell = rnn.DropoutWrapper( rnn.LSTMBlockCell(n_hidden, forget_bias=1.0), self.keep_prob2) # Get lstm cell output """ try: outputs, _, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs,_ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell,x, dtype=tf.float32)""" outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output #return tf.matmul(outputs[-1], weights['out']) + biases['out'] return tf.concat(outputs, 2) """
def test_attention_wrapper_lstm_encoder(self): size = 5 time_step = 3 input_size = 4 attn_size = size batch_size = 9 # shape [batch size, time step, size] # attention_state: usually the output of an RNN encoder. # This tensor should be shaped `[batch_size, max_time, ...]` encoder_time_step = time_step encoder_x_val = np.random.randn(encoder_time_step, input_size).astype('f') encoder_x_val = np.stack([encoder_x_val] * batch_size) encoder_x = tf.placeholder(tf.float32, encoder_x_val.shape, name="input_1") encoder_cell = rnn.LSTMBlockCell(size) output, attr_state = tf.nn.dynamic_rnn(encoder_cell, encoder_x, dtype=tf.float32) _ = tf.identity(output, name="output_0") attention_states = output attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( attn_size, attention_states) match_input_fn = lambda curr_input, state: tf.concat( [curr_input, state], axis=-1) cell = rnn.LSTMBlockCell(size) match_cell_fw = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=attn_size, cell_input_fn=match_input_fn, output_attention=False) decoder_time_step = 6 decoder_x_val = np.random.randn(decoder_time_step, input_size).astype('f') decoder_x_val = np.stack([decoder_x_val] * batch_size) decoder_x = tf.placeholder(tf.float32, decoder_x_val.shape, name="input_2") output, attr_state = tf.nn.dynamic_rnn(match_cell_fw, decoder_x, dtype=tf.float32) _ = tf.identity(output, name="output") _ = tf.identity(attr_state.cell_state, name="final_state") feed_dict = {"input_1:0": encoder_x_val, "input_2:0": decoder_x_val} input_names_with_port = ["input_1:0", "input_2:0"] output_names_with_port = ["output_0:0", "output:0", "final_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, 0.1)
def bidirectional_lstm(input_, cond, n_hidden=256, n_steps=32, n_input=54, name='bidirec_lstm'): with tf.variable_scope(name): print('new_lstm discrim') # weights = tf.get_variable('weights', [4096, 1], # initializer=tf.random_normal_initializer(stddev=0.02)) # biases = tf.get_variable('biases', [1], initializer=tf.constant_initializer(0.0)) # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) input_x = tf.unstack(input_, n_steps, 1) # print(image.shape)s # print('-----------------------------------x shape: ', x[0].get_shape()) # Calculate shifts x = [] for i in range(n_steps - 1): x.append( tf.concat([input_x[i], input_x[i + 1] - input_x[i], cond], 1)) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0) # Get lstm cell output try: outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell], [lstm_bw_cell], x, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs = rnn.stack_bidirectional_rnn([lstm_fw_cell], [lstm_bw_cell], x, dtype=tf.float32) h = tf.concat(outputs, 1) h, h_w, h_b = linear(h, 1024, 'd_h3_lin', with_w=True) h = tf.nn.relu(h) h, h_w, h_b = linear(h, 1, 'd_h4_lin', with_w=True) return h
def bilstm_layer(self, data, keep_prob): x = tf.unstack(data, self.max_seq_len, 1) lstm_fw_cell = rnn.LSTMBlockCell(num_units=self.lstm_hidden_unit_size) lstm_fw_cell_dropout = rnn.DropoutWrapper(cell=lstm_fw_cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob, state_keep_prob=keep_prob) lstm_bw_cell = rnn.LSTMBlockCell(num_units=self.lstm_hidden_unit_size) lstm_bw_cell_dropout = rnn.DropoutWrapper(cell=lstm_bw_cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob, state_keep_prob=keep_prob) rnn_output, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell_dropout, lstm_bw_cell_dropout, x, sequence_length=self.seq_length_placeholder, dtype=tf.float32) return rnn_output
def test_single_dynamic_lstm_seq_length_is_not_const(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") y_val = np.array([4, 3, 4, 5, 2, 1], dtype=np.int32) seq_length = tf.placeholder(tf.int32, y_val.shape, name="input_2") # no scope cell = rnn.LSTMBlockCell(units) outputs, cell_state = tf.nn.dynamic_rnn( cell, x, dtype=tf.float32, sequence_length=tf.identity(seq_length)) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val, "input_2:0": y_val} input_names_with_port = ["input_1:0", "input_2:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_lstm_count(g, 1))
def test_single_dynamic_lstm_with_cell_clip(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # no scope cell = rnn.LSTMBlockCell(units, cell_clip=0.05) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") input_names_with_port = ["input_1:0"] feed_dict = {"input_1:0": x_val} output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-03, atol=1e-06)
def test_single_dynamic_lstm_consume_one_of_ch_tuple(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # no scope cell = rnn.LSTMBlockCell(units) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state.c, name="cell_state_c") _ = tf.identity(cell_state.h, name="cell_state_h") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = [ "output:0", "cell_state_c:0", "cell_state_h:0" ] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_lstm_count(g, 1))
def test_single_dynamic_lstm_time_major(self): units = 5 seq_len = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * seq_len) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # no scope cell = rnn.LSTMBlockCell(units) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, time_major=True, dtype=tf.float32) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") input_names_with_port = ["input_1:0"] feed_dict = {"input_1:0": x_val} output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_lstm_count(g, 1))
def create_lstm_cell(layer): if hyperparameters.layer_norm: if hyperparameters.num_proj: raise Exception( 'No support for layer normalization together with projection layer.' ) cell = rnn.LayerNormBasicLSTMCell( hyperparameters.lstm_state_size, # here, we use the local variable dropout that is set to 0 # if we are evaluating. dropout_keep_prob=1 - dropout, layer_norm=hyperparameters.layer_norm) else: if hyperparameters.num_proj: cell = rnn.LSTMCell(hyperparameters.lstm_state_size, num_proj=hyperparameters.num_proj) else: cell = rnn.LSTMBlockCell(hyperparameters.lstm_state_size, forget_bias=0) if dropout > 0: cell = rnn.DropoutWrapper(cell, output_keep_prob=1 - dropout) return cell
def rnn_cell(rnn_cell_size, dropout_keep_prob): """Builds an LSTMBlockCell based on the given parameters.""" cells = [] for layer_size in rnn_cell_size: cell = rnn.LSTMBlockCell(layer_size) cell = rnn.DropoutWrapper(cell, input_keep_prob=dropout_keep_prob) cells.append(cell) return rnn.MultiRNNCell(cells)
def _get_lstm_cell(self, rnn_mode, hidden_size, is_training): if rnn_mode == BASIC: return tfrnn.LSTMCell( hidden_size, state_is_tuple=True, reuse = not is_training ) if rnn_mode == BLOCK: return tfrnn.LSTMBlockCell( hidden_size) raise ValueError('rnn mode {} not supported'.format(rnn_mode))
def test_multiple_dynamic_lstm(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") _ = tf.placeholder(tf.float32, x_val.shape, name="input_2") lstm_output_list = [] lstm_cell_state_list = [] # no scope cell = rnn.LSTMBlockCell(units) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) lstm_output_list.append(outputs) lstm_cell_state_list.append(cell_state) # given scope cell = rnn.LSTMBlockCell(units) with variable_scope.variable_scope("root1") as scope: outputs, cell_state = tf.nn.dynamic_rnn( cell, x, dtype=tf.float32, sequence_length=[4, 4, 4, 4, 4, 4], scope=scope) lstm_output_list.append(outputs) lstm_cell_state_list.append(cell_state) _ = tf.identity(lstm_output_list, name="output") _ = tf.identity(lstm_cell_state_list, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_lstm_count(g, 2))
def _get_lstm_cell(self, config, is_training): if config.rnn_mode == BASIC: # return tf.contrib.rnn.BasicLSTMCell( return rnn.BasicLSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True, reuse=not is_training) if config.rnn_mode == BLOCK: # return tf.contrib.rnn.LSTMBlockCell( return rnn.LSTMBlockCell(config.hidden_size, forget_bias=0.0) raise ValueError("rnn_mode %s not supported" % config.rnn_mode)
def rnn_cell(rnn_cell_size, dropout_keep_prob, residual, is_training=True): """Builds an LSTMBlockCell based on the given parameters.""" dropout_keep_prob = dropout_keep_prob if is_training else 1.0 cells = [] for i in range(len(rnn_cell_size)): cell = rnn.LSTMBlockCell(rnn_cell_size[i]) if residual: cell = rnn.ResidualWrapper(cell) if i == 0 or rnn_cell_size[i] != rnn_cell_size[i - 1]: cell = rnn.InputProjectionWrapper(cell, rnn_cell_size[i]) cell = rnn.DropoutWrapper(cell, input_keep_prob=dropout_keep_prob) cells.append(cell) return rnn.MultiRNNCell(cells)
def RNN(x): # Define a lstm cell with tensorflow lstm_cell = rnn.LSTMBlockCell( num_hidden, forget_bias=1.0) # Get lstm cell output # outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) outputs, states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=x, time_major=False, dtype=tf.float32) output_layer = tf.layers.Dense( num_classes, activation=None, kernel_initializer=tf.orthogonal_initializer() ) return output_layer(tf.layers.batch_normalization(outputs[:, -1, :]))
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.unstack(x, n_steps, 1) # Define a lstm cell with tensorflow #lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) #lstm_cell = rnn.GRUCell(n_hidden) lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) #lstm_cell_bk = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) lstm_cell_bk = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0 ) # make the deep rnn no_of_layers = 3 # layer number of drnn stacked_lstm = rnn.MultiRNNCell( [ #rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) rnn.LSTMCell(n_hidden, use_peepholes=True, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) for _ in range(no_of_layers) ] ) stacked_lstm_bk = rnn.MultiRNNCell( [ #rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) rnn.LSTMCell(n_hidden, use_peepholes=True, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) for _ in range(no_of_layers) ] ) # providing the dropout for rnn #lstm_cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=0.5) # for rnn stacked_lstm = rnn.DropoutWrapper(stacked_lstm, output_keep_prob=0.5) # for deep rnn # Get lstm cell output #outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # single layer rnn #outputs, states = rnn.static_rnn(stacked_lstm,, x, dtype=tf.float32) # deep rnn #outputs, states, states_bk = rnn.static_bidirectional_rnn(lstm_cell, lstm_cell_bk, x, dtype=tf.float32) # single layer dirnn #outputs, states, states_bk = rnn.static_bidirectional_rnn(stacked_lstm, stacked_lstm_bk, x, dtype=tf.float32) # deep dirnn outputs, states, states_bk = rnn.stack_bidirectional_rnn([rnn.GRUCell(n_hidden) for _ in range(no_of_layers)], [rnn.GRUCell(n_hidden) for _ in range(no_of_layers)], x, dtype=tf.float32) # deep dirnn # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def __init__(self, num_actions, observation_names, lstm_num_hiddens=256, feed_action_and_reward=True, max_reward=1.0, name="streetlearn_core"): """Initializes an agent core designed to be used with A3C/IMPALA. Supports a single visual observation tensor and outputs a single, scalar discrete action with policy logits and a baseline value. Args: num_actions: Number of actions available. observation_names: String with observation types separated by semi-colon. lstm_num_hiddens: Number of hiddens in the LSTM core. feed_action_and_reward: If True, the last action (one hot) and last reward (scalar) will be concatenated to the torso. max_reward: If `feed_action_and_reward` is True, the last reward will be clipped to `[-max_reward, max_reward]`. If `max_reward` is None, no clipping will be applied. N.B., this is different from reward clipping during gradient descent, or reward clipping by the environment. name: Optional name for the module. """ super(PlainAgent, self).__init__(name='agent') # Policy config self._num_actions = num_actions tf.logging.info('Agent trained on %d-action policy', self._num_actions) # Append last reward (clipped) and last action? self._feed_action_and_reward = feed_action_and_reward self._max_reward = max_reward # Policy LSTM core config self._lstm_num_hiddens = lstm_num_hiddens # Extract the observation names observation_names = observation_names.split(';') self._idx_frame = observation_names.index('view_image') with self._enter_variable_scope(): tf.logging.info('LSTM core with %d hiddens', self._lstm_num_hiddens) self._core = contrib_rnn.LSTMBlockCell(self._lstm_num_hiddens)
def build_graph(self, input, nextinput): # embedding embeddingW = tf.get_variable('embedding', [self.vocab_size, self.num_rnn_unit]) input_feature = tf.nn.embedding_lookup(embeddingW, input) input_list = tf.unstack(input_feature, axis=1) # rnn cell = rnn.MultiRNNCell([ rnn.LSTMBlockCell(num_units=self.num_rnn_unit) for _ in range(self.num_rnn_layer) ]) def get_v(n): ret = tf.get_variable(n + '_unused', [BATCH_SIZE, self.num_rnn_unit], trainable=False, initializer=tf.constant_initializer()) ret = tf.placeholder_with_default(ret, shape=[None, self.num_rnn_unit], name=n) return ret initial_state = [ rnn.LSTMStateTuple(get_v('c{}'.format(i)), get_v('h{}'.format(i))) for i in range(self.num_rnn_layer) ] outputs, last_state = rnn.static_rnn(cell, input_list, initial_state) last_state = tf.identity(last_state, 'last_state') # FC output = tf.reshape(tf.concat(outputs, 1), [-1, self.num_rnn_unit]) logits = FullyConnected('fc', output, self.vocab_size) tf.nn.softmax(logits, name='prob') loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.reshape(nextinput, [-1])) cost = tf.reduce_mean(loss, name='cost') summary.add_moving_summary(cost) return cost
def BuildLSTM(self, name, input_x, initstate, weights, biases, reuse): with tf.variable_scope("LSTM") as scope: if (reuse): tf.get_variable_scope().reuse_variables() #processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors #input_x = tf.unstack(input_x ,self.num_steps,axis = 1) cells = [] for _ in range(self.num_layers): cell = rnn.LSTMBlockCell(self.hidden_size, forget_bias=1) cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=1.0 - self.dropout) cells.append(cell) cell = tf.contrib.rnn.MultiRNNCell( cells ) #RNN cell composed sequentially of multiple simple cells. outputs, states = tf.nn.dynamic_rnn( cell, input_x, dtype="float32") #dynamic unrolling of inputs reuse = True tf.get_variable_scope().reuse_variables() return outputs
def test_single_dynamic_lstm_placeholder_input(self): units = 5 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * 6) x = tf.placeholder(tf.float32, shape=(None, 4, 2), name="input_1") # no scope cell = rnn.LSTMBlockCell(units) outputs, cell_state = tf.nn.dynamic_rnn( cell, x, dtype=tf.float32) # by default zero initializer is used _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_lstm_count(g, 1))
def RNN_BLOCK_LSTM(X): with tf.variable_scope('RNN'): lstm_cell = rnn.LSTMBlockCell(num_hidden, forget_bias=1.0) # lstm_cell = rnn.DropoutWrapper( # rnn.LSTMBlockCell(num_hidden, forget_bias=1.0), # input_keep_prob=0.5, # output_keep_prob=0.5, # state_keep_prob=0.5, # dtype=tf.float32 # ) outputs, state = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=X, dtype=tf.float32) batch_norm = tf.layers.batch_normalization(outputs[:, -1, :]) logits = tf.layers.dense(inputs=batch_norm, units=num_classes, activation=None, kernel_initializer=tf.orthogonal_initializer()) return logits
def _get_rnn_cell(self): with tf.variable_scope('rnn_cell'): lstm_cell = rnn.LSTMBlockCell(self.hidden_size, forget_bias=1.0) return rnn.DropoutWrapper(cell=lstm_cell, input_keep_prob=self._keep_prob)
if count > 0: print("%s %sdoes NOT match%s, error count = %d (out of %d) AVG=%g ABSAVG=%g" % (buf, RED+BOLD, ENDC, count, size, avg_ref, avg_abs_ref_orig)) else: print("%s %sDOES match%s, size = %d AVG=%g ABSAVG=%g" % (buf, GREEN+BOLD, ENDC, size, avg_ref, avg_abs_ref_orig)) N=64 C=128 K=192 T=10 forget_bias=1.0 tf.set_random_seed(1) #x = tf.constant(-0.1, shape=[N,C], dtype = tf.float32) #x2 = tf.constant(0.1, shape=[N,C], dtype = tf.float32) x = tf.random_normal(shape=[N,C], dtype = tf.float32) #+ 0.5 x2 = tf.random_normal(shape=[N,C], dtype = tf.float32) #+ 0.5 lstm_cell_ref = rnn.LSTMBlockCell(K, forget_bias=forget_bias, name='test') #lstm_cell_ref = rnn.BasicLSTMCell(K, forget_bias=forget_bias, name='test') #lstm_cell = rnn.LSTMBlockCell(K, forget_bias=forget_bias, name='test', reuse=True) lstm_cell = xsmm_lstm.XsmmLSTMCell(K, forget_bias=forget_bias, name='test', reuse=True) init_state = lstm_cell_ref.zero_state(N, dtype=tf.float32) x_fused = tf.convert_to_tensor([x] + [x2 for _ in range(T-1)]) print("x_fused is: %s" % x_fused) outputs_ref, states_ref = tf.nn.dynamic_rnn(lstm_cell_ref, x_fused, dtype=tf.float32, initial_state=init_state, time_major=True) outputs, states = tf.nn.dynamic_rnn(lstm_cell, x_fused, dtype=tf.float32, initial_state=init_state, time_major=True) init = tf.global_variables_initializer() W = tf.global_variables()[0] B = tf.global_variables()[1] g_ref = tf.gradients(outputs_ref, [x_fused] + [W, B, init_state.c, init_state.h]) g = tf.gradients(outputs, [x_fused] + [W, B, init_state.c, init_state.h])
def __init__(self, nqc, value_encodings, relation_encodings, num_gpus=1, encoder=None): """Builds a simple, fully-connected model to predict the outcome set given a query string. Args: nqc: NeuralQueryContext value_encodings: (bert features for values, length of value span) relation_encodings: (bert features for relations, length of relation span) num_gpus: number of gpus for distributed computation encoder: encoder (layers.RNN) for parameter sharing between train and dev Needs: self.input_ph: input to encoder (either one-hot or BERT layers) self.mask_ph: mask for the input self.correct_set_ph.name: target labels (if loss or accuracy is computed) self.prior_start: sparse matrix for string similarity features self.is_training: whether the model should is training (for dropout) Exposes: self.loss: objective for loss self.accuracy: mean accuracy metric (P_{predicted}(gold)) self.accuracy_per_ex: detailed per example accuracy self.log_nql_pred_set: predicted entity set (in nql) self.log_decoded_relations: predicted relations (as indices) self.log_start_values: predicted start values (in nql) self.log_start_cmps: components of predicted start values (in nql) """ # Encodings should have the same dimensions assert value_encodings[0].shape[-1] == relation_encodings[0].shape[-1] self.context = nqc self.input_ph = tf.placeholder(tf.float32, shape=(None, FLAGS.max_query_length, value_encodings[0].shape[-1]), name="oh_seq_ph") self.mask_ph = tf.placeholder(tf.float32, shape=(None, FLAGS.max_query_length), name="oh_mask_ph") self.debug = None layer_size = FLAGS.layer_size num_layers = FLAGS.num_layers max_properties = FLAGS.max_properties logits_strategy = FLAGS.logits dropout_rate = FLAGS.dropout inferred_batch_size = tf.shape(self.input_ph)[0] self.is_training = tf.placeholder(tf.bool, shape=[]) value_tensor = util.reshape_to_tensor(value_encodings[0], value_encodings[1]) relation_tensor = util.reshape_to_tensor(relation_encodings[0], relation_encodings[1]) # The last state of LSTM encoder is the representation of the input string with tf.variable_scope("model"): # Build all the model parts: # encoder: LSTM encoder # prior: string features # {value, relation}_similarity: learned embedding similarty # decoder: LSTM decoder # value_model: map from encoder to key for attention # attention: Luong (dot product) attention # Builds encoder - note that this is in keras self.encoder = self._build_encoder(encoder, layer_size, num_layers) # Build module to turn prior (string features) into logits self.prior_start = tf.sparse.placeholder( tf.float32, name="prior_start_ph", shape=[inferred_batch_size, value_tensor.shape[1]]) with tf.variable_scope("prior"): prior = Prior() # Build similarity module - biaffine qAr with tf.variable_scope("value_similarity"): value_similarity = Similarity(layer_size, value_tensor, num_gpus) # Build relation decoder with tf.variable_scope("relation_decoder"): rel_dec_rnn_layers = [ contrib_rnn.LSTMBlockCell(layer_size, name=("attr_lstm_%d" % i)) for (i, layer_size) in enumerate([layer_size] * num_layers) ] relation_decoder_cell = tf.nn.rnn_cell.MultiRNNCell( rel_dec_rnn_layers) tf.logging.info( "relation decoder lstm has state of size: {}".format( relation_decoder_cell.state_size)) # Build similarity module - biaffine qAr with tf.variable_scope("relation_similarity"): relation_similarity = Similarity(layer_size, relation_tensor, 1) with tf.variable_scope("attention"): attention = layers.Attention() value_model = tf.get_variable( "value_transform", shape=[layer_size, relation_decoder_cell.output_size], trainable=True) # Initialization for logging, variables shouldn't be used elsewhere log_decoded_starts = [] log_start_logits = [] log_decoded_relations = [] # Initialization to prepare before first iteration of loop prior_logits_0 = prior.compute_logits( tf.sparse.to_dense(self.prior_start)) cumulative_entities = nqc.all("id_t") relation_decoder_out = tf.zeros([inferred_batch_size, layer_size]) encoder_output = self.encoder(self.input_ph, mask=self.mask_ph) query_encoder_out = encoder_output[0] relation_decoder_state = encoder_output[1:] # Initialization for property loss, equal to log vars but separating value_dist = [] relation_dist = [] for i in range(max_properties): prior_logits = tf.layers.dropout(prior_logits_0, rate=dropout_rate, training=self.is_training) # Use the last state to determine key; more stable than last output query_key = tf.nn.relu( tf.matmul( tf.expand_dims(relation_decoder_state[-1][-1], axis=1), value_model)) query_emb = tf.squeeze(attention( [query_key, query_encoder_out], mask=[None, tf.cast(self.mask_ph, tf.bool)]), axis=1) similarity_logits = value_similarity.compute_logits(query_emb) if logits_strategy == "prior": total_logits = prior_logits elif logits_strategy == "sim": total_logits = similarity_logits elif logits_strategy == "mixed": total_logits = prior_logits + similarity_logits total_dist = contrib_layers.softmax(total_logits) values_pred = nqc.as_nql(total_dist, "val_g") with tf.variable_scope("start_follow_{}".format(i)): start_pred = nqc.all("v_t").follow( values_pred) # find starting nodes # Given the previous set of attributes, where are we going? (relation_decoder_out, relation_decoder_state) = relation_decoder_cell( relation_decoder_out, relation_decoder_state) pred_relation = tf.nn.softmax( relation_similarity.compute_logits(relation_decoder_out)) if FLAGS.enforce_type: if i == 0: is_adjust = nqc.as_tf(nqc.one(IS_A, "rel_g")) else: is_adjust = 1 - nqc.as_tf(nqc.one(IS_A, "rel_g")) pred_relation = pred_relation * is_adjust nql_pred_relation = nqc.as_nql(pred_relation, "rel_g") # Conjunctive (& start.follow() & start.follow()...). with tf.variable_scope("relation_follow_{}".format(i)): current_entities = start_pred.follow(nql_pred_relation) cumulative_entities = cumulative_entities & current_entities # For property loss and regularization value_dist.append(total_dist) relation_dist.append(pred_relation) # Store predictions for logging log_decoded_starts.append(start_pred) log_decoded_relations.append(pred_relation) log_start_logits.append([prior_logits, similarity_logits]) (loss, pred_set_tf, pred_set_tf_norm) = self._compute_loss(cumulative_entities) property_loss = self._compute_property_loss(value_dist, relation_dist) (accuracy_per_ex, accuracy) = self._compute_accuracy(cumulative_entities, pred_set_tf) value_loss = self._compute_distribution_regularizer(value_dist) relation_loss = self._compute_distribution_regularizer(relation_dist) self.regularization = FLAGS.time_reg * (value_loss + relation_loss) self.loss = loss - self.regularization self.property_loss = property_loss self.accuracy_per_ex = accuracy_per_ex self.accuracy = accuracy # Debugging/logging information log_decoded_relations = tf.transpose(tf.stack(log_decoded_relations), [1, 0, 2]) tf.logging.info("decoded relations has shape: {}".format( log_decoded_relations.shape)) self.log_start_values = log_decoded_starts self.log_start_cmps = [[ nqc.as_nql(logits, "val_g") for logits in comp ] for comp in log_start_logits] self.log_decoded_relations = tf.nn.top_k(log_decoded_relations, k=5) self.log_nql_pred_set = nqc.as_nql(pred_set_tf_norm, "id_t")
def lstm_fw(): cell_fw = rnn.LSTMBlockCell(self.hidden_size) cell_fw = rnn.DropoutWrapper(cell_fw, output_keep_prob=self.keep_prob) return cell_fw