def call(self, inputs, state): """Gated recurrent unit (GRU) with nunits cells.""" if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) with vs.variable_scope("gates"): # Reset gate and update gate. self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) value = math_ops.sigmoid(self._gate_linear([inputs, state])) r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with vs.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) c = self._activation(self._candidate_linear([inputs, r_state])) new_h = u * state + (1 - u) * c return new_h, new_h
def call(self, inputs, state): sigmoid = math_ops.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. c, h, e = state if self._linear is None: self._linear = _Linear([inputs, h], 5 * self._num_units, True) #self._linear = _Linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o ,arf= array_ops.split( value=self._linear([inputs, h]), num_or_size_splits=5, axis=1) with tf.variable_scope('mapeh'): if self._linear1 is None: self._linear1 = _Linear([e, h], self._num_units, True) e1=array_ops.split( value=self._linear1([e,h]), num_or_size_splits=1, axis=0)[0] new_c = ( c * sigmoid(f + self._forget_bias) + sigmoid(i) * (sigmoid(arf)*(self._activation(j))+(1-sigmoid(arf))*(self._activation(e1))) ) new_h = self._activation(new_c) * sigmoid(o) new_state = EncryptLSTMStateTuple(new_c, new_h, e) return new_h, new_state
def call(self, inputs, state, att_score=None): if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) with vs.variable_scope("gates"): self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) value = math_ops.sigmoid(self._gate_linear([inputs, state])) r, u = array_ops.split(value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with vs.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) c = self._activation(self._candidate_linear([inputs, r_state])) u = (1.0 - att_score) * u new_h = u * state + (1 - u) * c return new_h, new_h
def call(self, inputs, state, att_score=None): if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) with vs.variable_scope("gates"): self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer ) value = math_ops.sigmoid(self._gate_linear([inputs, state])) r, u = array_ops.split(value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with vs.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer ) c = self._activation(self._candidate_linear([inputs, r_state])) u = (1.0 - att_score) * u new_h = u * state + (1 - u) * c return new_h, new_h
def call(self, inputs, state): with vs.variable_scope("map"): self._map_linear = _Linear( [inputs], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) z = self._activation(self._map_linear([inputs])) u_inputs = array_ops.concat([z, state], 1) with vs.variable_scope("gate"): self._gate_linear = _Linear( [u_inputs], self._num_units, True, bias_initializer=self._bias_initializer if self._bias_initializer is not None else init_ops.constant_initializer(1., dtype=self.dtype), kernel_initializer=self._kernel_initializer) u = math_ops.sigmoid(self._gate_linear([u_inputs])) new_state = u * state + (1. - u) * z return new_state, new_state
def call(self, inputs, state, att_score=None): """Gated recurrent unit (GRU) with nunits cells.""" if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) with vs.variable_scope("gates"): # Reset gate and update gate. self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) value = math_ops.sigmoid(self._gate_linear([inputs, state])) r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with vs.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) c = self._activation(self._candidate_linear([inputs, r_state])) new_h = (1. - att_score) * state + att_score * c return new_h, new_h
def __call__(self, inputs, state, scope=None): """Run one step of minimal RNN. Args: inputs: input Tensor, 2D, batch x num_units. state: a state Tensor, `2-D, batch x state_size`. Returns: A tuple containing: - A `2-D, [batch x num_units]`, Tensor representing the output of the cell after reading `inputs` when previous state was `state`. - A `2-D, [batch x num_units]`, Tensor representing the new state of cell after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: - If input size cannot be inferred from inputs via static shape inference. - If state is not `2D`. """ # Phi projection to a latent space / candidate if self._phi is None: with tf.variable_scope("candidate"): if self._phi_initializer is not None: self._phi = self._phi_initializer( inputs, self._num_units, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) else: self._phi = _Linear( inputs, self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) z = self._activation(self._phi(inputs)) # Update gate if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) with tf.variable_scope("update_gate"): self._gate_linear = _Linear( [state, z], self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) u = math_ops.sigmoid(self._gate_linear([state, z])) # Activation step new_h = u * state + (1 - u) * z return new_h, new_h
def call(self, inputs, state): def replace_w(x): if x.op.name.endswith('kernel'): return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit) else: return x with bit_utils.replace_variable(replace_w): sigmoid = tf.sigmoid # Parameters of gates are concatenated into one multiply for # efficiency. if self._state_is_tuple: c, h = state else: c, h = tf.split(value=state, num_or_size_splits=2, axis=1) if self._linear is None: self._linear = rnn_cell_impl._Linear( [inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split( value=self._linear([inputs, h]), num_or_size_splits=4, axis=1) new_c = ( c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = bit_utils.round_bit(self._activation( new_c) * sigmoid(o), bit=self._f_bit) if self._state_is_tuple: new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h) else: new_state = tf.concat([new_c, new_h], 1) return new_h, new_state
def call(self, inputs, state): """Gated recurrent unit (GRU) with nunits cells.""" # inputs = realinputs + m +rt # rt's length is self._num_units # state = rt * older state # input = first 2 part totalLength = inputs.get_shape().as_list()[1] inputs_ = inputs[:, 0:totalLength - self._num_units] rth = inputs[:, totalLength - self._num_units:] inputs = inputs_ state = math_ops.multiply(rth, state) if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) with vs.variable_scope("gates"): # Reset gate and update gate. self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) value = math_ops.sigmoid(self._gate_linear([inputs, state])) r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with vs.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) c = self._activation(self._candidate_linear([inputs, r_state])) new_h = u * state + (1 - u) * c return new_h, new_h
def call(self, inputs, state): def replace_w(x): if x.op.name.endswith('kernel'): return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit) else: return x with bit_utils.replace_variable(replace_w): if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = tf.constant_initializer( 1.0, dtype=inputs.dtype) with tf.variable_scope("gates"): # Reset gate and update gate. self._gate_linear = rnn_cell_impl._Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) value = tf.sigmoid(self._gate_linear([inputs, state])) r, u = tf.split(value=value, num_or_size_splits=2, axis=1) r_state = bit_utils.round_bit(r * state, bit=self._f_bit) if self._candidate_linear is None: with tf.variable_scope("candidate"): self._candidate_linear = rnn_cell_impl._Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) c = self._activation(self._candidate_linear([inputs, r_state])) c = bit_utils.round_bit(c, bit=self._f_bit) new_h = bit_utils.round_bit( u * state + (1 - u) * c, bit=self._f_bit) return new_h, new_h
def call(self, inputs, state): """Long short-term memory cell (LSTM). Args: inputs: `2-D` tensor with shape `[batch_size x input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size x self.state_size]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size x 2 * self.state_size]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). """ sigmoid = tf.sigmoid self._step = self._step + 1 # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = tf.split(value=state, num_or_size_splits=2, axis=1) if self._linear is None: self._linear = _Linear([inputs, h], 4 * self._num_units, True) i, j, f, o = tf.split(value=self._linear([inputs, h]), num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h_cnt = self._activation(new_c) * sigmoid(o) if self._step % self._skip_size == 0: w_h_skip, b_h_skip = self.weight_bias( [self._num_units, self._num_units], [self._num_units]) new_h_skip = sigmoid(tf.matmul(h, w_h_skip) + b_h_skip) masked_w1, masked_w2 = self.masked_weight(_load=False) new_h = new_h_cnt * masked_w1 + new_h_skip * masked_w2 else: new_h = new_h_cnt if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = tf.concat([new_c, new_h], 1) return new_h, new_state
def call(self, inputs, state): if self._gate_linear is None: with tf.variable_scope("gates"): self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, kernel_initializer=tf.orthogonal_initializer(1.0), bias_initializer=tf.constant_initializer(1.0)) value = tf.sigmoid(self._gate_linear([inputs, state])) r, u = tf.split(value=value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with tf.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, kernel_initializer=tf.orthogonal_initializer(1.0), bias_initializer=tf.constant_initializer(-1.0)) c = tf.nn.tanh(self._candidate_linear([inputs, r_state])) new_h = u * state + (1 - u) * c return new_h, new_h
def call(self, inputs, state): """ Basic RNN: output = new_state = act(W * input + U * state + B). The implentation is similar to `BasicRNNCell.call()` in tensorflow/python/ops/rnn_cell_impl.py, but we pass the kernel initializer to `_Linear()` """ if self._linear is None: self._linear = _Linear([inputs, state], self._num_units, build_bias=True, kernel_initializer=self._kernel_initializer) output = self._activation(self._linear([inputs, state])) return output, output
def call(self, inputs, state): """Long short-term memory cell (LSTM). Args: inputs: `2-D` tensor with shape `[batch_size x input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size x self.state_size]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size x 2 * self.state_size]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). """ sigmoid = tf.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = tf.split(value=state, num_or_size_splits=2, axis=1) if self._linear is None: self._linear = _Linear([inputs, h], 4 * self._num_units, True) if self.mode == tf.estimator.ModeKeys.TRAIN: mask = tf.ones_like(self._linear._weights) mask_1, mask_2 = tf.split(mask, num_or_size_splits=2, axis=1) mask_2 = tf.nn.dropout( mask_2, keep_prob=self.weight_keep_drop) * self.weight_keep_drop mask = tf.concat([mask_1, mask_2], axis=1) self._linear._weights = self._linear._weights * mask # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(value=self._linear([inputs, h]), num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h) else: new_state = tf.concat([new_c, new_h], 1) return new_h, new_state
def call(self, inputs, state): """Long short-term memory cell (LSTM). Args: inputs: `2-D` tensor with shape `[batch_size x input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size x self.state_size]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size x 2 * self.state_size]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). """ sigmoid = math_ops.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) if self._linear is None: self._linear = _Linear([inputs, h], 4 * self._num_units, True) if self._state_keep_prob < 1.0: weights = self._linear._weights input_size = weights.get_shape().as_list()[0] - self._num_units input_weights, state_weights = array_ops.split( weights, [input_size, self._num_units]) state_weights = state_weights * self._mask_tensor self._linear._weights = array_ops.concat( [input_weights, state_weights], 0) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=self._linear([inputs, h]), num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def call(self, inputs, state): sigmoid = math_ops.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. c, h, e = state if self._linear is None: self._linear = _Linear([inputs, h, e], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=self._linear([inputs, h, e]), num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) new_state = EncryptLSTMStateTuple(new_c, new_h, e) return new_h, new_state
def call(self, inputs, state, att_score=None): time_now_score = tf.expand_dims(inputs[:,-1], -1) time_last_score = tf.expand_dims(inputs[:,-2], -1) inputs = inputs[:,:-2] inputs = inputs * att_score num_proj = self._num_units if self._num_proj is None else self._num_proj sigmoid = math_ops.sigmoid if self._state_is_tuple: (c_prev, m_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) dtype = inputs.dtype input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from inputs.get_shape()[-1]") if self._time_kernel_w1 is None: scope = vs.get_variable_scope() with vs.variable_scope( scope, initializer=self._initializer) as unit_scope: with vs.variable_scope(unit_scope): self._time_input_w1 = vs.get_variable( "_time_input_w1", shape=[self._num_units], dtype=dtype) self._time_input_bias1 = vs.get_variable( "_time_input_bias1", shape=[self._num_units], dtype=dtype) self._time_input_w2 = vs.get_variable( "_time_input_w2", shape=[self._num_units], dtype=dtype) self._time_input_bias2 = vs.get_variable( "_time_input_bias2", shape=[self._num_units], dtype=dtype) self._time_kernel_w1 = vs.get_variable( "_time_kernel_w1", shape=[input_size, self._num_units], dtype=dtype) self._time_kernel_t1 = vs.get_variable( "_time_kernel_t1", shape=[self._num_units, self._num_units], dtype=dtype) self._time_bias1 = vs.get_variable( "_time_bias1", shape=[self._num_units], dtype=dtype) self._time_kernel_w2 = vs.get_variable( "_time_kernel_w2", shape=[input_size, self._num_units], dtype=dtype) self._time_kernel_t2 = vs.get_variable( "_time_kernel_t2", shape=[self._num_units, self._num_units], dtype=dtype) self._time_bias2 = vs.get_variable( "_time_bias2", shape=[self._num_units], dtype=dtype) self._o_kernel_t1 = vs.get_variable( "_o_kernel_t1", shape=[self._num_units, self._num_units], dtype=dtype) self._o_kernel_t2 = vs.get_variable( "_o_kernel_t2", shape=[self._num_units, self._num_units], dtype=dtype) time_now_input = tf.nn.tanh(time_now_score * self._time_input_w1 + self._time_input_bias1) time_last_input = tf.nn.tanh(time_last_score * self._time_input_w2 + self._time_input_bias2) time_now_state = math_ops.matmul(inputs, self._time_kernel_w1) + math_ops.matmul(time_now_input, self._time_kernel_t1) + self._time_bias1 time_last_state = math_ops.matmul(inputs, self._time_kernel_w2) + math_ops.matmul(time_last_input, self._time_kernel_t2) + self._time_bias2 if self._linear1 is None: scope = vs.get_variable_scope() with vs.variable_scope( scope, initializer=self._initializer) as unit_scope: if self._num_unit_shards is not None: unit_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_unit_shards)) self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate lstm_matrix = self._linear1([inputs, m_prev]) i, j, f, o = array_ops.split( value=lstm_matrix, num_or_size_splits=4, axis=1) o = o + math_ops.matmul(time_now_input, self._o_kernel_t1) + math_ops.matmul(time_last_input, self._o_kernel_t2) # Diagonal connections if self._use_peepholes and not self._w_f_diag: scope = vs.get_variable_scope() with vs.variable_scope( scope, initializer=self._initializer) as unit_scope: with vs.variable_scope(unit_scope): self._w_f_diag = vs.get_variable( "w_f_diag", shape=[self._num_units], dtype=dtype) self._w_i_diag = vs.get_variable( "w_i_diag", shape=[self._num_units], dtype=dtype) self._w_o_diag = vs.get_variable( "w_o_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * sigmoid(time_last_state) * c_prev + sigmoid(i + self._w_i_diag * c_prev) * sigmoid(time_now_state) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * sigmoid(time_last_state) * c_prev + sigmoid(i) * sigmoid(time_now_state) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type if self._use_peepholes: m = sigmoid(o + self._w_o_diag * c) * self._activation(c) else: m = sigmoid(o) * self._activation(c) if self._num_proj is not None: if self._linear2 is None: scope = vs.get_variable_scope() with vs.variable_scope(scope, initializer=self._initializer): with vs.variable_scope("projection") as proj_scope: if self._num_proj_shards is not None: proj_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_proj_shards)) self._linear2 = _Linear(m, self._num_proj, False) m = self._linear2(m) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) # pylint: enable=invalid-unary-operand-type new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat([c, m], 1)) return m, new_state
def call(self, inputs, state): num_proj = self._num_units if self._num_proj is None else self._num_proj sigmoid = math_ops.sigmoid (c_prev, m_prev) = state dtype = inputs.dtype input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") if self._linear1 is None: scope = vs.get_variable_scope() with vs.variable_scope( scope, initializer=self._initializer) as unit_scope: if self._num_unit_shards is not None: unit_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_unit_shards)) self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate lstm_matrix = self._linear1([inputs, m_prev]) i, j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1) # Diagonal connections if self._use_peepholes and not self._w_f_diag: scope = vs.get_variable_scope() with vs.variable_scope( scope, initializer=self._initializer) as unit_scope: with vs.variable_scope(unit_scope): self._w_f_diag = vs.get_variable("w_f_diag", shape=[self._num_units], dtype=dtype) self._w_i_diag = vs.get_variable("w_i_diag", shape=[self._num_units], dtype=dtype) self._w_o_diag = vs.get_variable("w_o_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev + sigmoid(i + self._w_i_diag * c_prev) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type if self._use_peepholes: m = sigmoid(o + self._w_o_diag * c) * self._activation(c) else: m = sigmoid(o) * self._activation(c) if self._num_proj is not None: if self._linear2 is None: scope = vs.get_variable_scope() with vs.variable_scope(scope, initializer=self._initializer): with vs.variable_scope("projection") as proj_scope: if self._num_proj_shards is not None: proj_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_proj_shards)) self._linear2 = _Linear(m, self._num_proj, False) m = self._linear2(m) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) # pylint: enable=invalid-unary-operand-type new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat([c, m], 1)) return m, new_state