def __init__(self, num_units, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, bn=0, return_gate=False, deterministic=None, activation=tanh): """ Initialize the parameters for an LSTM cell. Args: num_units: int, The number of units in the LSTM cell use_peepholes: bool, set True to enable diagonal/peephole connections. cell_clip: (optional) A float value, if provided the cell state is clipped by this value prior to the cell output activation. initializer: (optional) The initializer to use for the weight and projection matrices. num_proj: (optional) int, The output dimensionality for the projection matrices. If None, no projection is performed. num_unit_shards: How to split the weight matrix. If >1, the weight matrix is stored across num_unit_shards. num_proj_shards: How to split the projection matrix. If >1, the projection matrix is stored across num_proj_shards. forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. return_gate: bool, set true to return the values of the gates. bn: int, set 1,2 or 3 to enable sequence-wise batch normalization with different level. Implemented according to arXiv:1603.09025 deterministic: Tensor, control training and testing phase, decide whether to open batch normalization. activation: Activation function of the inner states. """ self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._num_proj = num_proj self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._activation = activation self._bn = bn self._return_gate = return_gate self._deterministic = deterministic self._return_gate = return_gate if num_proj: self._state_size = LSTMStateTuple(num_units, num_proj) self._output_size = num_proj else: self._state_size = LSTMStateTuple(num_units, num_units) self._output_size = num_units
def build_decoder_cell(rank, u_emb, batch_size, depth=2): cell = [] for i in range(depth): if i == 0: cell.append(LSTMCell(rank, state_is_tuple=True)) else: cell.append(ResidualWrapper(LSTMCell(rank, state_is_tuple=True))) initial_state = LSTMStateTuple(tf.zeros_like(u_emb), u_emb) initial_state = [initial_state, ] for i in range(1, depth): initial_state.append(cell[i].zero_state(batch_size, tf.float32)) return MultiRNNCell(cell), tuple(initial_state)
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = state x_size = x.get_shape().as_list()[1] W_xh = tf.get_variable('W_xh', [x_size, 4 * self._num_units], initializer=orthogonal_initializer()) W_hh = tf.get_variable( 'W_hh', [self._num_units, 4 * self._num_units], initializer=bn_lstm_identity_initializer(0.95)) bias = tf.get_variable('bias', [4 * self._num_units]) xh = tf.matmul(x, W_xh) hh = tf.matmul(h, W_hh) bn_xh = batch_norm(xh, 'xh', self.training) bn_hh = batch_norm(hh, 'hh', self.training) hidden = bn_xh + bn_hh + bias i, j, f, o = tf.split(1, 4, hidden) new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * self._activation(j) bn_new_c = batch_norm(new_c, 'c', self.training) new_h = self._activation(bn_new_c) * tf.sigmoid(o) new_state = LSTMStateTuple(new_c, new_h) return new_h, new_state
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = state # Keep W_xh and W_hh separate here as well to reuse initialization methods x_size = x.get_shape().as_list()[1] W_xh = tf.get_variable('W_xh', [x_size, 4 * self.num_units], initializer=orthogonal_initializer()) W_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=bn_lstm_identity_initializer(0.95)) bias = tf.get_variable('bias', [4 * self.num_units]) # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias # improve speed by concat. concat = tf.concat(1, [x, h]) W_both = tf.concat(0, [W_xh, W_hh]) hidden = tf.matmul(concat, W_both) + bias i, j, f, o = tf.split(1, 4, hidden) new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) new_state = LSTMStateTuple(new_c, new_h) return new_h, new_state
def call(self, inputs, state): """Long short-term memory cell (LSTM).""" sigmoid = math_ops.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) concat = self._line_sep([inputs, h], 4 * self._num_units, bias=False) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def __call__(self, inputs, state, scope=None): """Convolutional Long short-term memory cell (ConvLSTM).""" with vs.variable_scope(scope or type(self).__name__): # "ConvLSTMCell" if self._state_is_tuple: c, h = state else: c, h = array_ops.split(3, 2, state) # batch_size * height * width * channel concat = _conv([inputs, h], 4 * self._num_units, self._k_size, True, initializer=self._initializer) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(3, 4, concat) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(3, [new_c, new_h]) return new_h, new_state
def call(self, inputs, state): """LSTM cell with layer normalization and recurrent dropout.""" c, h = state #args = array_ops.concat([inputs, h], 1) #concat = self._linear(args) #dtype = args.dtype concat = self._linear([inputs, h], self._num_units * 4, False) i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) if self._layer_norm: i = self._norm(i, "input") j = self._norm(j, "transform") f = self._norm(f, "forget") o = self._norm(o, "output") g = self._activation(j) # if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: # g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._layer_norm: new_c = self._norm(new_c, "state") new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = LSTMStateTuple(new_c, new_h) return new_h, new_state
def let_compute(): var["compute_interface"] = True ns2 = controller_state if self.cache_attend_dim > 0: # values = utility.pack_into_tensor(cache_controller_hidden, axis=1) values = cache_controller_hidden.gather( tf.range(time - time2, time + 1)) value_size = self.hidden_controller_dim encoder_outputs = \ tf.reshape(values, [self.batch_size, -1, value_size]) # bs x Lin x h v = tf.reshape( tf.matmul(tf.reshape(encoder_outputs, [-1, value_size]), self.cU_a), [self.batch_size, -1, self.cache_attend_dim]) if self.use_mem: v += tf.reshape( tf.matmul( tf.reshape(last_read_vectors, [-1, self.read_heads * self.word_size]), self.cV_a), [self.batch_size, 1, self.cache_attend_dim]) ns, statetype = self.get_hidden_value_from_state( controller_state) print("state typeppppp") print(controller_state) print(ns) v += tf.reshape( tf.matmul(tf.reshape(ns, [-1, self.hidden_controller_dim]), self.cW_a), [self.batch_size, 1, self.cache_attend_dim ]) # bs.Lin x h_att print('state include only h') v = tf.reshape(tf.tanh(v), [-1, self.cache_attend_dim]) eijs = tf.matmul(v, tf.expand_dims(self.cv_a, 1)) # bs.Lin x 1 eijs = tf.reshape(eijs, [self.batch_size, -1]) # bs x Lin alphas = tf.nn.softmax(eijs) att = tf.reduce_sum(encoder_outputs * tf.expand_dims(alphas, 2), 1) # bs x h x 1 att = tf.reshape(att, [self.batch_size, value_size]) # bs x h # step = tf.concat([var["step"], att], axis=-1) # bs x (encoder_input_size + h) # step = tf.matmul(step, self.cW_ah) # bs x encoder_input_size (or emb_size) if statetype == 1: ns2 = list(controller_state) ns2[-1][-1] = att ns2 = tuple(ns2) elif statetype == 2 or statetype == 3: # ns2 = list(controller_state) ns2 = LSTMStateTuple(controller_state[0], att) # ns2 = tuple(ns2) elif statetype == 4: return att return ns2
def c2(): con_c1=controller_state1[0] con_h1=controller_state1[1] con_c2 = controller_state2[0] con_h2 = controller_state2[1] ncontroller_state = LSTMStateTuple(tf.concat([con_c1,con_c2],axis=-1), tf.concat([con_h1,con_h2],axis=-1)) nread_vec = tf.concat([last_read_vectors1, last_read_vectors2],axis=1) pre_output, interface, nn_state = \ self.controller3.process_input(step1, nread_vec, ncontroller_state) #trick split than group c_l, c_r = tf.split(nn_state[0],num_or_size_splits=2, axis=-1) h_l, h_r = tf.split(nn_state[1], num_or_size_splits=2, axis=-1) return pre_output, interface, (LSTMStateTuple(c_l,h_l), LSTMStateTuple(c_r, h_r))
def build_graph(self): """ builds the computational graph that performs a step-by-step evaluation of the input data batches """ self.unstacked_input_data = utility.unstack_into_tensorarray( self.input_data, 1, self.sequence_length) outputs = tf.TensorArray(tf.float32, self.sequence_length) free_gates = tf.TensorArray(tf.float32, self.sequence_length) allocation_gates = tf.TensorArray(tf.float32, self.sequence_length) write_gates = tf.TensorArray(tf.float32, self.sequence_length) read_weightings = tf.TensorArray(tf.float32, self.sequence_length) write_weightings = tf.TensorArray(tf.float32, self.sequence_length) usage_vectors = tf.TensorArray(tf.float32, self.sequence_length) controller_state = self.controller.get_state( ) if self.controller.has_recurrent_nn else (tf.zeros(1), tf.zeros(1)) memory_state = self.memory.init_memory() if not isinstance(controller_state, LSTMStateTuple): controller_state = LSTMStateTuple(controller_state[0], controller_state[1]) final_results = None with tf.compat.v1.variable_scope("sequence_loop") as scope: time = tf.constant(0, dtype=tf.int32) final_results = tf.while_loop( cond=lambda time, *_: time < self.sequence_length, body=self._loop_body, loop_vars=(time, memory_state, outputs, free_gates, allocation_gates, write_gates, read_weightings, write_weightings, usage_vectors, controller_state), parallel_iterations=32, swap_memory=True) dependencies = [] if self.controller.has_recurrent_nn: dependencies.append(self.controller.update_state(final_results[9])) with tf.control_dependencies(dependencies): self.stacked_output = utility.stack_into_tensor(final_results[2], axis=1) self.stacked_memory_view = { 'free_gates': utility.stack_into_tensor(final_results[3], axis=1), 'allocation_gates': utility.stack_into_tensor(final_results[4], axis=1), 'write_gates': utility.stack_into_tensor(final_results[5], axis=1), 'read_weightings': utility.stack_into_tensor(final_results[6], axis=1), 'write_weightings': utility.stack_into_tensor(final_results[7], axis=1), 'usage_vectors': utility.stack_into_tensor(final_results[8], axis=1) }
def Planner(self, training_input, testing_input, label_status, length, mask): with tf.variable_scope('planner'): batch_size = self.batch_size / self.gpu_num rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers) w_status = tf.get_variable( 'w_status', [self.n_hidden, 2], initializer=tf.contrib.layers.xavier_initializer()) b_status = tf.get_variable( 'b_status', [2], initializer=tf.contrib.layers.xavier_initializer()) # training training_input_dropout = tf.nn.dropout(training_input, self.keep_prob) # b*l, h shape = training_input_dropout.get_shape().as_list() training_input_reshape = tf.reshape( training_input_dropout, [batch_size, self.max_step, shape[1]]) # b, l, h rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell, training_input_reshape, sequence_length=length, dtype=tf.float32) # b, l, h rnn_output_dropout = tf.nn.dropout(rnn_output, self.keep_prob) rnn_output_reshape = tf.reshape(rnn_output_dropout, [-1, self.n_hidden]) # b*l, h logits = tf.reshape(tf.matmul(rnn_output_reshape, w_status), [-1, 2]) + b_status # b*l, n label_status_reshape = tf.reshape(label_status, [-1]) loss_status = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_status_reshape, logits=logits) loss_status_scalar = tf.reduce_sum(loss_status * mask) # testing prev_state = [] for l in xrange(self.n_layers): prev_state.append( LSTMStateTuple( tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state{0}.c'.format(l)), tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state{0}.h'.format(l)))) if self.n_layers == 1: prev_state = prev_state[0] rnn_output_test, state = rnn_cell(testing_input, prev_state) # b*l, h prob = tf.reshape( tf.nn.softmax(tf.matmul(rnn_output_test, w_status) + b_status), [-1, 2]) # pred_status_test = tf.argmax(prob, axis=1) return loss_status_scalar, prob, state, prev_state
def call(self, inputs, state): (c_prev, m_prev) = state input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") with vs.variable_scope("highway_lstm_cell", initializer=self._initializer, reuse=self._reuse): # i = input_gate, j = new_input, f = forget_gate, o = output_gate, r = transform_gate num_weights = self.highway and 5 or 4 with vs.variable_scope('hidden_weights'): hidden_matrix = linear_block_initialization(m_prev, num_weights * [self._num_units], bias=False) num_weights = self.highway and 6 or 4 with vs.variable_scope('input_weights'): input_matrix = linear_block_initialization(inputs, num_weights * [self._num_units], bias=True) if self.highway: ih, jh, fh, oh, rh = array_ops.split(value=hidden_matrix, num_or_size_splits=5, axis=1) ix, jx, fx, ox, rx, hx = array_ops.split(value=input_matrix, num_or_size_splits=6, axis=1) i = sigmoid(ih + ix) o = sigmoid(oh + ox) f = sigmoid(fh + fx + self._forget_bias) j = self._activation(jh + jx) c = f * c_prev + i * j t = sigmoid(rh + rx) _m = o * self._activation(c) m = t * _m + (1 - t) * hx else: ih, jh, fh, oh = array_ops.split(value=hidden_matrix, num_or_size_splits=4, axis=1) ix, jx, fx, ox = array_ops.split(value=input_matrix, num_or_size_splits=4, axis=1) i = sigmoid(ih + ix) o = sigmoid(oh + ox) f = sigmoid(fh + fx + self._forget_bias) c = i * self._activation(jh + jx) + f * c_prev m = o * self._activation(c) new_state = (LSTMStateTuple(c, m)) return m, new_state
def p2(): tmp = [(self.cur_mem_content[0], self.cur_u[0], self.cur_p[0], self.cur_L[0], self.cur_ww[0], self.cur_rw[0], self.cur_rv[0]), (self.cur_mem_content[1], self.cur_u[1], self.cur_p[1], self.cur_L[1], self.cur_ww[1], self.cur_rw[1], self.cur_rv[1])] if len(memory_state[0]) > len(tmp[0]): print('cache mode') tmp[0] = (self.cur_mem_content[0], self.cur_u[0], self.cur_p[0], self.cur_L[0], self.cur_ww[0], self.cur_rw[0], self.cur_rv[0], memory_state[0][-2], memory_state[0][-1]) tmp[1] = (self.cur_mem_content[1], self.cur_u[1], self.cur_p[1], self.cur_L[1], self.cur_ww[1], self.cur_rw[1], self.cur_rv[1], memory_state[1][-2], memory_state[1][-1]) return tmp, \ LSTMStateTuple(self.cur_c[0], self.cur_h[0]),LSTMStateTuple(self.cur_c[1], self.cur_h[1])
def call(self, inputs, state): sigmoid = math_ops.sigmoid one = constant_op.constant(1, dtype=dtypes.int32) # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one) dt = tf.tensordot(inputs, self._wt, axes=[[-1], [0]]) dt = tf.tile(tf.expand_dims(dt, 1), [1, self._premise_length, 1]) dm = tf.tensordot(h, self._wm, axes=[[-1], [0]]) dm = tf.tile(tf.expand_dims(dm, 1), [1, self._premise_length, 1]) e_kj = tf.tensordot(tf.nn.tanh(dt + self._ds + dm), self._we, axes=[[-1], [0]]) e_kj = e_kj + (1. - self._premise_mask) * tf.float32.min alpha = tf.nn.softmax(e_kj, axis=1) a_k = tf.reduce_sum(tf.multiply(alpha, self._premise), axis=1) m_k = tf.concat([a_k, inputs], axis=1) gate_inputs = math_ops.matmul(array_ops.concat([m_k, h], 1), self._kernel) gate_inputs = nn_ops.bias_add(gate_inputs, self._bias) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=gate_inputs, num_or_size_splits=4, axis=one) forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype) # Note that using `add` and `multiply` instead of `+` and `*` gives a # performance improvement. So using those at the cost of readability. add = math_ops.add multiply = math_ops.multiply new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))), multiply(sigmoid(i), self._activation(j))) new_h = multiply(self._activation(new_c), sigmoid(o)) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def _loop_body(self, time, memory_state, outputs, free_gates, allocation_gates, write_gates, read_weightings, write_weightings, usage_vectors, controller_state): """ the body of the DNC sequence processing loop Parameters: ---------- time: Tensor outputs: TensorArray memory_state: Tuple free_gates: TensorArray allocation_gates: TensorArray write_gates: TensorArray read_weightings: TensorArray, write_weightings: TensorArray, usage_vectors: TensorArray, controller_state: Tuple Returns: Tuple containing all updated arguments """ step_input = self.unpacked_input_data.read(time) output_list = self._step_op(step_input, memory_state, controller_state) # update memory parameters new_controller_state = tf.zeros(1) new_memory_state = tuple(output_list[0:7]) new_controller_state = LSTMStateTuple(output_list[11], output_list[12]) outputs = outputs.write(time, output_list[7]) # collecting memory view for the current step free_gates = free_gates.write(time, output_list[8]) allocation_gates = allocation_gates.write(time, output_list[9]) write_gates = write_gates.write(time, output_list[10]) read_weightings = read_weightings.write(time, output_list[5]) write_weightings = write_weightings.write(time, output_list[4]) usage_vectors = usage_vectors.write(time, output_list[1]) return (time + 1, new_memory_state, outputs, free_gates, allocation_gates, write_gates, read_weightings, write_weightings, usage_vectors, new_controller_state)
def Model(self): conv1 = model_utils.Conv2D(self.depth_input, 4, (5, 5), (4, 4), scope='conv1') # b*l, h, w, c conv2 = model_utils.Conv2D(conv1, 16, (5, 5), (4, 4), scope='conv2') # b*l, h, w, c conv3 = model_utils.Conv2D(conv2, 32, (3, 3), (2, 2), scope='conv3') # b*l, h, w, c shape = conv3.get_shape().as_list() rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers) w_linear_a = tf.get_variable('w_linear', [self.n_hidden, 1], initializer=tf.initializers.random_uniform(-0.003, 0.003)) w_angular_a = tf.get_variable('w_angular', [self.n_hidden, 1], initializer=tf.initializers.random_uniform(-0.003, 0.003)) b_linear_a = tf.get_variable('b_linear_a', [1], initializer=tf.initializers.random_uniform(-0.003, 0.003)) b_angular_a = tf.get_variable('b_angular_a', [1], initializer=tf.initializers.random_uniform(-0.003, 0.003)) # training depth_vectors = tf.reshape(conv3, (self.batch_size, self.max_steps, shape[1]*shape[2]*shape[3])) # b, l, h rnn_outputs, _ = tf.nn.dynamic_rnn(rnn_cell, depth_vectors, sequence_length=self.lengths, dtype=tf.float32) # b, l, h rnn_outputs_reshape = tf.reshape(rnn_outputs, [-1, self.n_hidden]) # b*l, h a_linear = tf.nn.sigmoid(tf.matmul(rnn_outputs_reshape, w_linear_a) + b_linear_a) * self.action_range[0] # b*l, 1 a_angular = tf.nn.tanh(tf.matmul(rnn_outputs_reshape, w_angular_a) + b_angular_a) * self.action_range[1] # b*l, 1 a = tf.concat([a_linear, a_angular], axis=1)# b*l, 2 # testing prev_rnn_state = [] for l in xrange(self.n_layers): prev_rnn_state.append( LSTMStateTuple(tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state1{0}.c'.format(l)), tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state1{0}.h'.format(l)))) if self.n_layers == 1: prev_rnn_state = prev_rnn_state[0] depth_vectors_test = tf.reshape(conv3, (1, 1, shape[1]*shape[2]*shape[3])) # b, l, h rnn_outputs_test, rnn_state = rnn_cell(tf.reshape(depth_vectors_test, [-1, shape[1]*shape[2]*shape[3]]), prev_rnn_state) a_linear_test = tf.nn.sigmoid(tf.matmul(rnn_outputs_test, w_linear_a) + b_linear_a) * self.action_range[0] # b*l, 1 a_angular_test = tf.nn.tanh(tf.matmul(rnn_outputs_test, w_angular_a) + b_angular_a) * self.action_range[1] # b*l, 1 a_test = tf.concat([a_linear_test, a_angular_test], axis=1) # b*l, 2 return a, a_test, rnn_state, prev_rnn_state
def _loop_body(self, time, memory_state, outputs, read_weightings, write_weightings, controller_state, write_vectors, key_vectors, beta_vectors, shift_vectors, gamma_vectors, gates_vectors, memory_vectors): """ the body of the DNC sequence processing loop Parameters: ---------- time: Tensor memory_state: Tuple outputs: TensorArray read_weightings: TensorArray, write_weightings: TensorArray, controller_state: Tuple Returns: Tuple containing all updated arguments """ step_input = self.unpacked_input_data.read(time) output_list = self._step_op(step_input, memory_state, controller_state) # update memory parameters new_controller_state = tf.zeros(1) new_memory_state = tuple(output_list[0:4]) new_controller_state = LSTMStateTuple(output_list[5], output_list[6]) outputs = outputs.write(time, output_list[4]) # collecting memory view for the current step read_weightings = read_weightings.write(time, output_list[2]) write_weightings = write_weightings.write(time, output_list[1]) write_vectors = write_vectors.write(time, output_list[7]) key_vectors = key_vectors.write(time, output_list[8]) beta_vectors = beta_vectors.write(time, output_list[9]) shift_vectors = shift_vectors.write(time, output_list[10]) gamma_vectors = gamma_vectors.write(time, output_list[11]) gates_vectors = gates_vectors.write(time, output_list[12]) memory_vectors = memory_vectors.write(time, output_list[0]) return (time + 1, new_memory_state, outputs, read_weightings, write_weightings, new_controller_state, write_vectors, key_vectors, beta_vectors, shift_vectors, gamma_vectors, gates_vectors, memory_vectors)
def call(self, inputs, state): """Long short-term memory cell (LSTM). Args: inputs: `2-D` tensor with shape `[batch_size x input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size x self.state_size]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size x 2 * self.state_size]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). """ sigmoid = math_ops.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) if self._linear is None: self._linear = _Linear([inputs, h], 4 * self._num_units, True) if self._state_keep_prob < 1.0: weights = self._linear._weights input_size = weights.get_shape().as_list()[0] - self._num_units input_weights, state_weights = array_ops.split( weights, [input_size, self._num_units]) state_weights = state_weights * self._mask_tensor self._linear._weights = array_ops.concat( [input_weights, state_weights], 0) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=self._linear([inputs, h]), num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def Model(self): conv1 = model_utils.Conv2D(self.depth_input, 4, (5, 5), (4, 4), scope='conv1') # b*l, h, w, c conv2 = model_utils.Conv2D(conv1, 16, (5, 5), (4, 4), scope='conv2') # b*l, h, w, c conv3 = model_utils.Conv2D(conv2, 32, (3, 3), (2, 2), scope='conv3') # b*l, h, w, c shape = conv3.get_shape().as_list() rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers) w_q = tf.get_variable('w_q', [self.n_hidden, 1], initializer=tf.initializers.random_uniform(-0.003, 0.003)) b_q = tf.get_variable('b_q', [1], initializer=tf.initializers.random_uniform(-0.003, 0.003)) # training depth_vectors = tf.reshape(conv3, (self.batch_size, self.max_steps, shape[1]*shape[2]*shape[3]), name='train_d_reshape') # b, l, h*w*c action_input_reshape = tf.reshape(self.action_input, (self.batch_size, self.max_steps, 2), name='train_a_reshape') # b, l, 2 inputs = tf.concat([depth_vectors, action_input_reshape], axis=2) # b, l, h*w*c+2 rnn_outputs, _ = tf.nn.dynamic_rnn(rnn_cell, inputs, sequence_length=self.lengths, dtype=tf.float32) # b, l, h rnn_outputs_reshape = tf.reshape(rnn_outputs, [-1, self.n_hidden]) # b*l, h q = tf.matmul(rnn_outputs_reshape, w_q) + b_q # b*l, 1 # q = tf.reshape(q, (self.batch_size, self.max_steps, 1)) # testing depth_vectors_test = tf.reshape(conv3, (1, 1, shape[1]*shape[2]*shape[3]), name='test_d_reshape') # b, l, h*w*c action_input_reshape_test = tf.reshape(self.action_input, (1, 1, 2), name='test_a_reshape') # b, l, 2 inputs_test = tf.concat([depth_vectors_test, action_input_reshape_test], axis=2) # b, l, h*w*c+2 prev_rnn_state = [] for l in xrange(self.n_layers): prev_rnn_state.append( LSTMStateTuple(tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state1{0}.c'.format(l)), tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state1{0}.h'.format(l)))) if self.n_layers == 1: prev_rnn_state = prev_rnn_state[0] rnn_outputs_test, rnn_state = rnn_cell(tf.reshape(inputs_test, (-1, shape[1]*shape[2]*shape[3]+2)), prev_rnn_state) q_test = tf.matmul(rnn_outputs_test, w_q) + b_q # b*l, 1 return q, q_test, rnn_state, prev_rnn_state
def impress(self, state_code, pre_impress_states, is_first_in_impress): with tf.variable_scope('impress', reuse=tf.AUTO_REUSE): def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None: #time = 0 # initialization input = state_code state = state_ emit_output = None loop_state = None else: input = cell_output emit_output = cell_output state = cell_state loop_state = None elements_finished = (time >= 1) return (elements_finished, input, state, emit_output, loop_state) multirnn_cell = MultiRNNCell([ LSTMCell(self.impress_dim) for _ in range(self.impress_lay_num) ], state_is_tuple=True) if is_first_in_impress == True: state_ = (multirnn_cell.zero_state(self.batch_size, tf.float32)) else: pre_impress_states = tf.unstack(pre_impress_states, axis=0) state_ = tuple([ LSTMStateTuple(pre_impress_states[idx][0], pre_impress_states[idx][1]) for idx in range(self.impress_lay_num) ]) emit_ta, states, final_loop_state = tf.nn.raw_rnn( multirnn_cell, loop_fn) state_impress_code = tf.transpose(emit_ta.stack(), [ 1, 0, 2 ])[0] # transpose for putting batch dimension to first dimension return state_impress_code, states
def call(self, inputs, state): """Long short-term memory cell (LSTM). Args: inputs: `2-D` tensor with shape `[batch_size, input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size, num_units]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size, 2 * num_units]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). """ sigmoid = math_ops.sigmoid one = constant_op.constant(1, dtype=dtypes.int32) # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one) gate_inputs = math_ops.matmul( array_ops.concat([inputs, h], 1), self._kernel) gate_inputs = nn_ops.bias_add(gate_inputs, self._bias) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split( value=gate_inputs, num_or_size_splits=4, axis=one) forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype) # Note that using `add` and `multiply` instead of `+` and `*` gives a # performance improvement. So using those at the cost of readability. add = math_ops.add multiply = math_ops.multiply new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))), multiply(sigmoid(i), self._activation(j))) new_h = multiply(self._activation(new_c), sigmoid(o)) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def Encoder(self): # a list that length is batch_size, every element refers to the time_steps of corresponding input inputs_length = tf.fill([tf.shape(self.xs)[0]], self.input_timestep) rnn_cell = LSTMCell(self.encoder_units) # use bidirectional rnn as encoder architecture (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=rnn_cell, cell_bw=rnn_cell, inputs=self.xs, sequence_length=inputs_length, dtype=self.dtype)) # merge every forward and backward output as total output output = tf.add(fw_outputs, bw_outputs) / 2 # merge every forward and backward final state as final state state_c = tf.concat([fw_final_state.c, bw_final_state.c], axis=1) state_h = tf.concat([fw_final_state.h, bw_final_state.h], axis=1) final_state = LSTMStateTuple(c=state_c, h=state_h) return output, final_state
def init_state(self, batch_size_tensor): if self._data_format == 'NHWC': state_shape = [ batch_size_tensor, self._out_height, self._out_width, self._num_units ] elif self._data_format == 'NCHW': state_shape = [ batch_size_tensor, self._num_units, self._out_height, self._out_width ] else: raise ValueError( "invalid data format. Expected one of [`NHWC`, `NCHW`], got {}" .format(self._data_format)) c = tf.fill(dims=state_shape, value=0.0, name='c') h = tf.fill(dims=state_shape, value=0.0, name='h') return LSTMStateTuple(c, h)
def call(self, inputs, state): # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = tf.split(value=state, num_or_size_splits=2, axis=one) h = tf.matmul(tf.concat([inputs, h], 1), self._kernel) c, h = fused_lstm_gates(c, h, bias=self._bias, forget_bias=self._forget_bias) if self._state_is_tuple: state = LSTMStateTuple(c, h) else: state = tf.concat([c, h], 1) return h, state
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = tf.split(state, 2, 3) concat = _conv_linear([inputs, h], self.filter_size, self.num_features * 4, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(concat, 4, 3) new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * tf.nn.sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = tf.concat([new_c, new_h], 3) return new_h, new_state
def __call__(self, inputs, state, scope=None): """Run the cell with the declared zoneouts.""" # compute output and new state as before output, new_state = self._cell(inputs, state, scope) # if either hidden state or memory cell zoneout is applied, then split state and process if self._has_hidden_state_zoneout or self._has_memory_cell_zoneout: # split state c_old, m_old = state c_new, m_new = new_state # apply zoneout to memory cell and hidden state c_and_m = [] for s_old, s_new, p, has_zoneout in [ (c_old, c_new, self._memory_cell_keep_prob, self._has_memory_cell_zoneout), (m_old, m_new, self._hidden_state_keep_prob, self._has_hidden_state_zoneout) ]: if has_zoneout: if self._is_training: mask = nn_ops.dropout( array_ops.ones_like(s_new), p, seed=self._seed ) * p # this should just random ops instead. See dropout code for how. s = ((1. - mask) * s_old) + (mask * s_new) else: s = ((1. - p) * s_old) + (p * s_new) else: s = s_new c_and_m.append(s) # package final results new_state = LSTMStateTuple(*c_and_m) output = new_state.h return output, new_state
def __call__(self, input, state, scope=None): """Convolutional long short-term memory cell (ConvLSTM).""" with variable_scope(scope or 'ConvLSTMCell'): previous_memory, previous_output = state with variable_scope('Expand'): batch_size = int(previous_memory.get_shape()[0]) shape = [ batch_size, self._height, self._width, self._num_units ] input = reshape(input, shape) previous_memory = reshape(previous_memory, shape) previous_output = reshape(previous_output, shape) with variable_scope('Convolve'): x = concat(3, [input, previous_output]) W = get_variable( 'Weights', self._kernel + [2 * self._num_units, 4 * self._num_units]) b = get_variable('Biases', [4 * self._num_units], initializer=constant_initializer(0.0)) y = conv2d(x, W, [1, 1, 1, 1], 'SAME') + b input_gate, new_input, forget_gate, output_gate = split( 3, 4, y) with variable_scope('LSTM'): memory = (previous_memory * sigmoid(forget_gate + self._forget_bias) + sigmoid(input_gate) * self._activation(new_input)) output = self._activation(memory) * sigmoid(output_gate) with variable_scope('Flatten'): shape = [-1, self._height * self._width * self._num_units] output = reshape(output, shape) memory = reshape(memory, shape) return output, LSTMStateTuple(memory, output)
def encoder(self): ####Encoder with tf.variable_scope(self.model_name + "encoder_model"): if self.Bidirection == False: encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.num_units) self.encoder_outputs, self.encoder_final_state = tf.nn.dynamic_rnn( cell=encoder_cell, inputs=self.encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, time_major=False, dtype=tf.float32) self.hidden_units = self.num_units elif self.Bidirection == True: encoder_cell_fw = LSTMCell(self.num_units) encoder_cell_bw = LSTMCell(self.num_units) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell_fw, cell_bw=encoder_cell_bw, inputs=self.encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, dtype=tf.float32, time_major=False)) # Concatenates tensors along one dimension. encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) encoder_final_state_c = tf.concat( (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) encoder_final_state_h = tf.concat( (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) # TF Tuple used by LSTM Cells for state_size, zero_state, and output state. self.encoder_final_state = LSTMStateTuple( c=encoder_final_state_c, h=encoder_final_state_h) self.hidden_units = 2 * self.num_units
def call(self, inputs, state): if not nest.is_sequence(state): raise ValueError("Expected state to be a tuple of length %d, but receive: %s" % (len(self.state_size), state)) n_layer = len(state) c, h = state[self._layer_pos] concat_h = array_ops.concat([s[-1] for s in state], axis=1) with variable_scope('input-forget-output-gates'): conc = _linear([inputs, h], 3 * self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) i, f, o = array_ops.split(conc, 3, axis=1) with variable_scope('scalar-gates'): gates = _linear([inputs, concat_h], n_layer, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) with variable_scope('gated-inputs'): gated_h = \ _linear(array_ops.reshape(array_ops.expand_dims(gates, axis=2) * array_ops.expand_dims(h, axis=1), (-1, n_layer * self._num_units)), self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) with variable_scope('new-inputs'): new_inputs = \ self._activation(_linear(inputs, self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) + gated_h) new_c = self._activation(c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * new_inputs) new_h = new_c * math_ops.sigmoid(o) new_state = LSTMStateTuple(new_c, new_h) return (new_h, new_state)
def __call__(self, inputs, state, scope=None): """Convolutional Long short-term memory cell (ConvLSTM).""" with vs.variable_scope(scope or type(self).__name__): # "ConvLSTMCell" if self._state_is_tuple: c, h = state else: c, h = array_ops.split(3, 2, state) s1 = vs.get_variable("s1", initializer=tf.ones([self._height, self._width, 4 * self._num_units]), dtype=tf.float32) s2 = vs.get_variable("s2", initializer=tf.ones([self._height, self._width, 4 * self._num_units]), dtype=tf.float32) # s3 = vs.get_variable("s3", initializer=tf.ones([self._batch_size, self._num_units]), dtype=tf.float32) b1 = vs.get_variable("b1", initializer=tf.zeros([self._height, self._width, 4 * self._num_units]), dtype=tf.float32) b2 = vs.get_variable("b2", initializer=tf.zeros([self._height, self._width, 4 * self._num_units]), dtype=tf.float32) # b3 = vs.get_variable("b3", initializer=tf.zeros([self._batch_size, self._num_units]), dtype=tf.float32) input_below_ = _conv([inputs], 4 * self._num_units, self._k_size, False, initializer=self._initializer, scope="out_1") input_below_ = ln(input_below_, s1, b1) state_below_ = _conv([h], 4 * self._num_units, self._k_size, False, initializer=self._initializer, scope="out_2") state_below_ = ln(state_below_, s2, b2) lstm_matrix = tf.add(input_below_, state_below_) i, j, f, o = array_ops.split(3, 4, lstm_matrix) # batch_size * height * width * channel # concat = _conv([inputs, h], 4 * self._num_units, self._k_size, True, initializer=self._initializer) # i = input_gate, j = new_input, f = forget_gate, o = output_gate # i, j, f, o = array_ops.split(3, 4, lstm_matrix) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(3, [new_c, new_h]) return new_h, new_state