Esempio n. 1
0
  def call(self, inputs, state):
    """Gated recurrent unit (GRU) with nunits cells."""
    if self._gate_linear is None:
      bias_ones = self._bias_initializer
      if self._bias_initializer is None:
        bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
      with vs.variable_scope("gates"):  # Reset gate and update gate.
        self._gate_linear = _Linear(
            [inputs, state],
            2 * self._num_units,
            True,
            bias_initializer=bias_ones,
            kernel_initializer=self._kernel_initializer)

    value = math_ops.sigmoid(self._gate_linear([inputs, state]))
    r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

    r_state = r * state
    if self._candidate_linear is None:
      with vs.variable_scope("candidate"):
        self._candidate_linear = _Linear(
            [inputs, r_state],
            self._num_units,
            True,
            bias_initializer=self._bias_initializer,
            kernel_initializer=self._kernel_initializer)
    c = self._activation(self._candidate_linear([inputs, r_state]))
    new_h = u * state + (1 - u) * c
    return new_h, new_h
Esempio n. 2
0
    def call(self, inputs, state):
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        c, h, e = state
        
        if self._linear is None:
            self._linear = _Linear([inputs, h], 5 * self._num_units, True)
            #self._linear = _Linear([inputs, h], 4 * self._num_units, True)
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o ,arf= array_ops.split(
            value=self._linear([inputs, h]), num_or_size_splits=5, axis=1)

        with tf.variable_scope('mapeh'):
            if self._linear1 is None:
                self._linear1 = _Linear([e, h],  self._num_units, True)
            e1=array_ops.split(
                value=self._linear1([e,h]), num_or_size_splits=1, axis=0)[0]
            
        new_c = (
            c * sigmoid(f + self._forget_bias) + sigmoid(i) * (sigmoid(arf)*(self._activation(j))+(1-sigmoid(arf))*(self._activation(e1)))
        )
        new_h = self._activation(new_c) * sigmoid(o)         
            
        new_state = EncryptLSTMStateTuple(new_c, new_h, e)
        
        return new_h, new_state
Esempio n. 3
0
    def call(self, inputs, state, att_score=None):
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(1.0,
                                                          dtype=inputs.dtype)
            with vs.variable_scope("gates"):
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        r, u = array_ops.split(value, num_or_size_splits=2, axis=1)
        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)

        c = self._activation(self._candidate_linear([inputs, r_state]))
        u = (1.0 - att_score) * u
        new_h = u * state + (1 - u) * c
        return new_h, new_h
    def call(self, inputs, state, att_score=None):
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
            with vs.variable_scope("gates"):
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer
                )

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        r, u = array_ops.split(value, num_or_size_splits=2, axis=1)
        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer
                )

        c = self._activation(self._candidate_linear([inputs, r_state]))
        u = (1.0 - att_score) * u
        new_h = u * state + (1 - u) * c
        return new_h, new_h
        def call(self, inputs, state):
            with vs.variable_scope("map"):
                self._map_linear = _Linear(
                    [inputs],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)

            z = self._activation(self._map_linear([inputs]))
            u_inputs = array_ops.concat([z, state], 1)

            with vs.variable_scope("gate"):
                self._gate_linear = _Linear(
                    [u_inputs],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer
                    if self._bias_initializer is not None
                    else init_ops.constant_initializer(1., dtype=self.dtype),
                    kernel_initializer=self._kernel_initializer)

            u = math_ops.sigmoid(self._gate_linear([u_inputs]))

            new_state = u * state + (1. - u) * z

            return new_state, new_state
Esempio n. 6
0
    def call(self, inputs, state, att_score=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
            with vs.variable_scope("gates"):  # Reset gate and update gate.
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)
        c = self._activation(self._candidate_linear([inputs, r_state]))
        new_h = (1. - att_score) * state + att_score * c
        return new_h, new_h
Esempio n. 7
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of minimal RNN.
          Args:
            inputs: input Tensor, 2D, batch x num_units.
            state: a state Tensor, `2-D, batch x state_size`.
          Returns:
            A tuple containing:
            - A `2-D, [batch x num_units]`, Tensor representing the output of the
              cell after reading `inputs` when previous state was `state`.
            - A `2-D, [batch x num_units]`, Tensor representing the new state of cell after reading `inputs` when
              the previous state was `state`.  Same type and shape(s) as `state`.
          Raises:
            ValueError:
            - If input size cannot be inferred from inputs via
              static shape inference.
            - If state is not `2D`.
        """

        # Phi projection to a latent space / candidate
        if self._phi is None:
            with tf.variable_scope("candidate"):
                if self._phi_initializer is not None:
                    self._phi = self._phi_initializer(
                        inputs,
                        self._num_units,
                        bias_initializer=self._bias_initializer,
                        kernel_initializer=self._kernel_initializer)
                else:
                    self._phi = _Linear(
                        inputs,
                        self._num_units,
                        True,
                        bias_initializer=self._bias_initializer,
                        kernel_initializer=self._kernel_initializer)

        z = self._activation(self._phi(inputs))

        # Update gate
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(1.0,
                                                          dtype=inputs.dtype)
            with tf.variable_scope("update_gate"):
                self._gate_linear = _Linear(
                    [state, z],
                    self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        u = math_ops.sigmoid(self._gate_linear([state, z]))

        # Activation step
        new_h = u * state + (1 - u) * z

        return new_h, new_h
Esempio n. 8
0
    def call(self, inputs, state):
        def replace_w(x):
            if x.op.name.endswith('kernel'):
                return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
            else:
                return x

        with bit_utils.replace_variable(replace_w):
            sigmoid = tf.sigmoid
            # Parameters of gates are concatenated into one multiply for
            # efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

            if self._linear is None:
                self._linear = rnn_cell_impl._Linear(
                    [inputs, h], 4 * self._num_units, True)
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = tf.split(
                value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)

            new_c = (
                c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
            new_h = bit_utils.round_bit(self._activation(
                new_c) * sigmoid(o), bit=self._f_bit)

            if self._state_is_tuple:
                new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)
            else:
                new_state = tf.concat([new_c, new_h], 1)
        return new_h, new_state
    def call(self, inputs, state):
        """Gated recurrent unit (GRU) with nunits cells."""
        # inputs = realinputs + m +rt
        # rt's length is self._num_units
        # state = rt * older state
        # input = first 2 part
        totalLength = inputs.get_shape().as_list()[1]
        inputs_ = inputs[:, 0:totalLength - self._num_units]
        rth = inputs[:, totalLength - self._num_units:]
        inputs = inputs_
        state = math_ops.multiply(rth, state)
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(1.0,
                                                          dtype=inputs.dtype)
            with vs.variable_scope("gates"):  # Reset gate and update gate.
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)
        c = self._activation(self._candidate_linear([inputs, r_state]))
        new_h = u * state + (1 - u) * c
        return new_h, new_h
Esempio n. 10
0
    def call(self, inputs, state):
        def replace_w(x):
            if x.op.name.endswith('kernel'):
                return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
            else:
                return x

        with bit_utils.replace_variable(replace_w):
            if self._gate_linear is None:
                bias_ones = self._bias_initializer
                if self._bias_initializer is None:
                    bias_ones = tf.constant_initializer(
                        1.0, dtype=inputs.dtype)
                with tf.variable_scope("gates"):  # Reset gate and update gate.
                    self._gate_linear = rnn_cell_impl._Linear(
                        [inputs, state],
                        2 * self._num_units,
                        True,
                        bias_initializer=bias_ones,
                        kernel_initializer=self._kernel_initializer)

            value = tf.sigmoid(self._gate_linear([inputs, state]))
            r, u = tf.split(value=value, num_or_size_splits=2, axis=1)

            r_state = bit_utils.round_bit(r * state, bit=self._f_bit)
            if self._candidate_linear is None:
                with tf.variable_scope("candidate"):
                    self._candidate_linear = rnn_cell_impl._Linear(
                        [inputs, r_state],
                        self._num_units,
                        True,
                        bias_initializer=self._bias_initializer,
                        kernel_initializer=self._kernel_initializer)
            c = self._activation(self._candidate_linear([inputs, r_state]))
            c = bit_utils.round_bit(c, bit=self._f_bit)
            new_h = bit_utils.round_bit(
                u * state + (1 - u) * c, bit=self._f_bit)
        return new_h, new_h
Esempio n. 11
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM).

        Args:
          inputs: `2-D` tensor with shape `[batch_size x input_size]`.
          state: An `LSTMStateTuple` of state tensors, each shaped
            `[batch_size x self.state_size]`, if `state_is_tuple` has been set to
            `True`.  Otherwise, a `Tensor` shaped
            `[batch_size x 2 * self.state_size]`.

        Returns:
          A pair containing the new hidden state, and the new state (either a
            `LSTMStateTuple` or a concatenated state, depending on
            `state_is_tuple`).
        """
        sigmoid = tf.sigmoid
        self._step = self._step + 1

        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

        if self._linear is None:
            self._linear = _Linear([inputs, h], 4 * self._num_units, True)

        i, j, f, o = tf.split(value=self._linear([inputs, h]),
                              num_or_size_splits=4,
                              axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h_cnt = self._activation(new_c) * sigmoid(o)

        if self._step % self._skip_size == 0:
            w_h_skip, b_h_skip = self.weight_bias(
                [self._num_units, self._num_units], [self._num_units])
            new_h_skip = sigmoid(tf.matmul(h, w_h_skip) + b_h_skip)
            masked_w1, masked_w2 = self.masked_weight(_load=False)
            new_h = new_h_cnt * masked_w1 + new_h_skip * masked_w2

        else:
            new_h = new_h_cnt

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat([new_c, new_h], 1)
        return new_h, new_state
Esempio n. 12
0
    def call(self, inputs, state):
        if self._gate_linear is None:
            with tf.variable_scope("gates"):
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    kernel_initializer=tf.orthogonal_initializer(1.0),
                    bias_initializer=tf.constant_initializer(1.0))
        value = tf.sigmoid(self._gate_linear([inputs, state]))
        r, u = tf.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state
        if self._candidate_linear is None:
            with tf.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    kernel_initializer=tf.orthogonal_initializer(1.0),
                    bias_initializer=tf.constant_initializer(-1.0))
        c = tf.nn.tanh(self._candidate_linear([inputs, r_state]))
        new_h = u * state + (1 - u) * c
        return new_h, new_h
Esempio n. 13
0
    def call(self, inputs, state):
        """
        Basic RNN: output = new_state = act(W * input + U * state + B).

        The implentation is similar to `BasicRNNCell.call()`
        in tensorflow/python/ops/rnn_cell_impl.py, but we pass
        the kernel initializer to `_Linear()`
        """
        if self._linear is None:
            self._linear = _Linear([inputs, state],
                                   self._num_units,
                                   build_bias=True,
                                   kernel_initializer=self._kernel_initializer)

        output = self._activation(self._linear([inputs, state]))
        return output, output
Esempio n. 14
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM).
        Args:
            inputs: `2-D` tensor with shape `[batch_size x input_size]`.
            state: An `LSTMStateTuple` of state tensors, each shaped
                `[batch_size x self.state_size]`, if `state_is_tuple` has been set to
                `True`.  Otherwise, a `Tensor` shaped
                `[batch_size x 2 * self.state_size]`.
            Returns:
            A pair containing the new hidden state, and the new state (either a
                `LSTMStateTuple` or a concatenated state, depending on
                `state_is_tuple`).
        """
        sigmoid = tf.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

        if self._linear is None:
            self._linear = _Linear([inputs, h], 4 * self._num_units, True)
            if self.mode == tf.estimator.ModeKeys.TRAIN:
                mask = tf.ones_like(self._linear._weights)
                mask_1, mask_2 = tf.split(mask, num_or_size_splits=2, axis=1)
                mask_2 = tf.nn.dropout(
                    mask_2,
                    keep_prob=self.weight_keep_drop) * self.weight_keep_drop
                mask = tf.concat([mask_1, mask_2], axis=1)
                self._linear._weights = self._linear._weights * mask

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = tf.split(value=self._linear([inputs, h]),
                              num_or_size_splits=4,
                              axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat([new_c, new_h], 1)
        return new_h, new_state
Esempio n. 15
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM).
		Args:
			inputs: `2-D` tensor with shape `[batch_size x input_size]`.
			state: An `LSTMStateTuple` of state tensors, each shaped
				`[batch_size x self.state_size]`, if `state_is_tuple` has been set to
				`True`.  Otherwise, a `Tensor` shaped
				`[batch_size x 2 * self.state_size]`.
		Returns:
			A pair containing the new hidden state, and the new state (either a
				`LSTMStateTuple` or a concatenated state, depending on
				`state_is_tuple`).
		"""
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        if self._linear is None:
            self._linear = _Linear([inputs, h], 4 * self._num_units, True)
            if self._state_keep_prob < 1.0:
                weights = self._linear._weights
                input_size = weights.get_shape().as_list()[0] - self._num_units
                input_weights, state_weights = array_ops.split(
                    weights, [input_size, self._num_units])
                state_weights = state_weights * self._mask_tensor
                self._linear._weights = array_ops.concat(
                    [input_weights, state_weights], 0)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=self._linear([inputs, h]),
                                     num_or_size_splits=4,
                                     axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
    def call(self, inputs, state):
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        c, h, e = state

        if self._linear is None:
            self._linear = _Linear([inputs, h, e], 4 * self._num_units, True)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=self._linear([inputs, h, e]),
                                     num_or_size_splits=4,
                                     axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        new_state = EncryptLSTMStateTuple(new_c, new_h, e)

        return new_h, new_state
Esempio n. 17
0
  def call(self, inputs, state, att_score=None):
    time_now_score = tf.expand_dims(inputs[:,-1], -1)
    time_last_score = tf.expand_dims(inputs[:,-2], -1)
    inputs = inputs[:,:-2]
    inputs = inputs * att_score
    num_proj = self._num_units if self._num_proj is None else self._num_proj
    sigmoid = math_ops.sigmoid

    if self._state_is_tuple:
      (c_prev, m_prev) = state
    else:
      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

    dtype = inputs.dtype
    input_size = inputs.get_shape().with_rank(2)[1]
    if input_size.value is None:
      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
            
    if self._time_kernel_w1 is None:
      scope = vs.get_variable_scope()
      with vs.variable_scope(
          scope, initializer=self._initializer) as unit_scope:
        with vs.variable_scope(unit_scope):
          self._time_input_w1 = vs.get_variable(
              "_time_input_w1", shape=[self._num_units], dtype=dtype)
          self._time_input_bias1 = vs.get_variable(
              "_time_input_bias1", shape=[self._num_units], dtype=dtype)
          self._time_input_w2 = vs.get_variable(
              "_time_input_w2", shape=[self._num_units], dtype=dtype)
          self._time_input_bias2 = vs.get_variable(
              "_time_input_bias2", shape=[self._num_units], dtype=dtype)
          self._time_kernel_w1 = vs.get_variable(
              "_time_kernel_w1", shape=[input_size, self._num_units], dtype=dtype)
          self._time_kernel_t1 = vs.get_variable(
              "_time_kernel_t1", shape=[self._num_units, self._num_units], dtype=dtype)
          self._time_bias1 = vs.get_variable(
              "_time_bias1", shape=[self._num_units], dtype=dtype)
          self._time_kernel_w2 = vs.get_variable(
              "_time_kernel_w2", shape=[input_size, self._num_units], dtype=dtype)
          self._time_kernel_t2 = vs.get_variable(
              "_time_kernel_t2", shape=[self._num_units, self._num_units], dtype=dtype)
          self._time_bias2 = vs.get_variable(
              "_time_bias2", shape=[self._num_units], dtype=dtype)
          self._o_kernel_t1 = vs.get_variable(
              "_o_kernel_t1", shape=[self._num_units, self._num_units], dtype=dtype)    
          self._o_kernel_t2 = vs.get_variable(
              "_o_kernel_t2", shape=[self._num_units, self._num_units], dtype=dtype)  
                
    time_now_input = tf.nn.tanh(time_now_score * self._time_input_w1 + self._time_input_bias1)
    time_last_input = tf.nn.tanh(time_last_score * self._time_input_w2 + self._time_input_bias2)      

    time_now_state = math_ops.matmul(inputs, self._time_kernel_w1) + math_ops.matmul(time_now_input, self._time_kernel_t1) + self._time_bias1
    time_last_state = math_ops.matmul(inputs, self._time_kernel_w2) + math_ops.matmul(time_last_input, self._time_kernel_t2) + self._time_bias2
    
    if self._linear1 is None:
      scope = vs.get_variable_scope()
      with vs.variable_scope(
          scope, initializer=self._initializer) as unit_scope:
        if self._num_unit_shards is not None:
          unit_scope.set_partitioner(
              partitioned_variables.fixed_size_partitioner(
                  self._num_unit_shards))
        self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units, True)

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    lstm_matrix = self._linear1([inputs, m_prev])
    i, j, f, o = array_ops.split(
        value=lstm_matrix, num_or_size_splits=4, axis=1)
    o = o + math_ops.matmul(time_now_input, self._o_kernel_t1) + math_ops.matmul(time_last_input, self._o_kernel_t2)   
    # Diagonal connections
    if self._use_peepholes and not self._w_f_diag:
      scope = vs.get_variable_scope()
      with vs.variable_scope(
          scope, initializer=self._initializer) as unit_scope:
        with vs.variable_scope(unit_scope):
          self._w_f_diag = vs.get_variable(
              "w_f_diag", shape=[self._num_units], dtype=dtype)
          self._w_i_diag = vs.get_variable(
              "w_i_diag", shape=[self._num_units], dtype=dtype)
          self._w_o_diag = vs.get_variable(
              "w_o_diag", shape=[self._num_units], dtype=dtype)

    if self._use_peepholes:
      c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * sigmoid(time_last_state) * c_prev +
           sigmoid(i + self._w_i_diag * c_prev) * sigmoid(time_now_state) * self._activation(j))
    else:
      c = (sigmoid(f + self._forget_bias) * sigmoid(time_last_state) * c_prev + sigmoid(i) * sigmoid(time_now_state) * self._activation(j))

    if self._cell_clip is not None:
      # pylint: disable=invalid-unary-operand-type
      c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
      # pylint: enable=invalid-unary-operand-type
    if self._use_peepholes:
      m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
    else:
      m = sigmoid(o) * self._activation(c)

    if self._num_proj is not None:
      if self._linear2 is None:
        scope = vs.get_variable_scope()
        with vs.variable_scope(scope, initializer=self._initializer):
          with vs.variable_scope("projection") as proj_scope:
            if self._num_proj_shards is not None:
              proj_scope.set_partitioner(
                  partitioned_variables.fixed_size_partitioner(
                      self._num_proj_shards))
            self._linear2 = _Linear(m, self._num_proj, False)
      m = self._linear2(m)

      if self._proj_clip is not None:
        # pylint: disable=invalid-unary-operand-type
        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
        # pylint: enable=invalid-unary-operand-type

    new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else
                 array_ops.concat([c, m], 1))
    return m, new_state
    def call(self, inputs, state):

        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        (c_prev, m_prev) = state

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        if self._linear1 is None:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                if self._num_unit_shards is not None:
                    unit_scope.set_partitioner(
                        partitioned_variables.fixed_size_partitioner(
                            self._num_unit_shards))
                self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units,
                                        True)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = self._linear1([inputs, m_prev])

        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)
        # Diagonal connections
        if self._use_peepholes and not self._w_f_diag:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._w_f_diag = vs.get_variable("w_f_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_i_diag = vs.get_variable("w_i_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_o_diag = vs.get_variable("w_o_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)

        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
        # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            if self._linear2 is None:
                scope = vs.get_variable_scope()
                with vs.variable_scope(scope, initializer=self._initializer):
                    with vs.variable_scope("projection") as proj_scope:
                        if self._num_proj_shards is not None:
                            proj_scope.set_partitioner(
                                partitioned_variables.fixed_size_partitioner(
                                    self._num_proj_shards))
                        self._linear2 = _Linear(m, self._num_proj, False)
            m = self._linear2(m)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state