Exemple #1
0
    def __init__(self,
                 num_units,
                 gate_mod=None,
                 ngram=False,
                 no_feedback=False,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 num_proj=None,
                 proj_clip=None,
                 num_unit_shards=None,
                 num_proj_shards=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 layer_norm=False,
                 activation=None,
                 reuse=None,
                 name=None,
                 dtype=None,
                 **kwargs):

        super(LSTMCell_mod, self).__init__(_reuse=reuse,
                                           name=name,
                                           dtype=dtype,
                                           **kwargs)

        print("LSTM cell mode: {0}".format(gate_mod))

        # Inputs must be 2-dimensional.
        self.input_spec = base_layer.InputSpec(ndim=2)

        self._num_units = num_units
        self._gate_mod = gate_mod
        self._ngram = ngram
        self._no_feedback = no_feedback
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._initializer = initializers.get(initializer)
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._layer_norm = layer_norm
        if activation:
            self._activation = activations.get(activation)
        else:
            self._activation = math_ops.tanh

        if num_proj:
            self._state_size = (LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
Exemple #2
0
    def __init__(self,
                 num_units,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 num_proj=None,
                 proj_clip=None,
                 num_unit_shards=None,
                 num_proj_shards=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 activation=None,
                 reuse=None):

        super(ExtHighWayLSTMCell, self).__init__(_reuse=reuse)
        if not state_is_tuple:
            tf.logging.warn(
                "%s: Using a concatenated state is slower and will soon be "
                "deprecated.  Use state_is_tuple=True.", self)
        if num_unit_shards is not None or num_proj_shards is not None:
            tf.logging.warn(
                "%s: The num_unit_shards and proj_unit_shards parameters are "
                "deprecated and will be removed in Jan 2017.  "
                "Use a variable scope with a partitioner instead.", self)

        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._initializer = initializer
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._activation = activation or math_ops.tanh

        if num_proj:
            self._state_size = (LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
        self._linear1 = None
        self._linear2 = None
        if self._use_peepholes:
            self._w_f_diag = None
            self._w_i_diag = None
            self._w_o_diag = None
Exemple #3
0
def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell(LSTM)"""
    with _checked_scope(self, scope or "basic_lstm_cell", reuse=self._reuse):
        # parameters of gates are concated into one multiply for efficiency
        if self._state_is_tuple:
            # 一般都走这个分支,取出c_t和h_t
            c, h = state
        else:
            c, h = array_ops.split(value=concat, num_or_size_splits=4, axis=1)
            # 参考了《Recurrent Neural Network Regularization》,一次计算四个gate
    concat = _linear([inputs, h], 4 * self._num_units, True)

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)

    new_c = (c* sigmoid(f+self._forget_bias)+ sigmoid(i)* self._activation(j))

    new_h = self._activation(new_c)*sigmoid(o)

    if self._state_is_tuple:
      new_state = LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)
    # 注意这里返回的输出是h_t,而state是(c,h)
    return new_h, new_state
    def call(self, ginputs, state):
        """ Run one step of LSTM.
        """
        sigmoid = tf.sigmoid
        grad = tf.slice(ginputs, [0, 0], [-1, self._split_lo])
        inputs = tf.slice(ginputs, [0, self._split_lo], [-1, -1])
        (c_prev, m_prev) = state
        # input_size = inputs.get_shape().with_rank(2)[1]
        # if input_size.value is None:
        #     raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
        scope = tf.get_variable_scope()
        with tf.variable_scope(scope,
                               initializer=self._initializer) as unit_scope:
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            inputs_norm = batch_normalization(inputs, name_scope="lstm_inputs")
            m_prev_norm = batch_normalization(m_prev, name_scope="lstm_hidden")
            # lstm_matrix = _linear([inputs_norm, m_prev_norm], 4 * self._num_units, bias=True)
            lstm_matrix = math_ops.matmul(tf.concat(
                [inputs, m_prev], 1), self._kernel)  # inputs_norm, m_prev_norm
            lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)
            i, j, f, o = tf.split(value=lstm_matrix,
                                  num_or_size_splits=4,
                                  axis=1)

            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))
            m = sigmoid(o) * self._activation(c)

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else tf.concat([c, m], 1))

        output = tf.concat([grad, m], 1)

        return output, new_state
    def __init__(self,
                 num_units,
                 num_var=1,
                 split=1,
                 varepsilon=1e-24,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 activation=None):

        super(AdamLSTMCell, self).__init__(num_units,
                                           use_peepholes=use_peepholes,
                                           cell_clip=cell_clip,
                                           initializer=initializer,
                                           forget_bias=forget_bias,
                                           state_is_tuple=state_is_tuple,
                                           activation=activation)
        self.rank = num_var
        self._split_lo = split
        # 1 + self.rank : momentum + variance
        self._state_size = LSTMStateTuple(num_units * (1 + self.rank),
                                          num_units * (1 + self.rank))
        self.eps = varepsilon
Exemple #6
0
def _default_dropout_state_filter_visitor(substate):
    if isinstance(substate, LSTMStateTuple):
        # Do not perform dropout on the memory state.
        return LSTMStateTuple(c=False, h=True)
    elif isinstance(substate, tensor_array_ops.TensorArray):
        return False
    return True
Exemple #7
0
    def call(self, inputs, state):
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        # get context from encoder outputs
        context = self._simple_attention(self._encoder_vector,
                                         self._encoder_proj, h)

        if self._linear is None:
            self._linear = _Linear([inputs, context, h], 4 * self._num_units,
                                   True)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=self._linear([inputs, context, h]),
                                     num_or_size_splits=4,
                                     axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
    def __call__(self, inputs, states):
        """this method is inheritated, and always calculate layer by layer"""
        sigmoid = tf.sigmoid
        if self._state_is_tuple:
            hs = ()
            for state in states:
                c, h = state  # c and h: tensor_size = (batch_size, hidden_size)
                hs += (
                    h,
                )  # hs : size = time_lag, i.e. time_lag * (batch_size, hidden_size)
        else:
            hs = ()
            for state in states:
                c, h = array_ops.split(value=state,
                                       num_or_size_splits=2,
                                       axis=1)
                hs += (h, )

        meta_variable_size = 4 * self.output_size
        concat = BinaryMera_wavefn(inputs, hs, meta_variable_size,
                                   self._num_orders, self._virtual_dim, True)
        i, j, f, o = array_ops.split(value=concat,
                                     num_or_size_splits=4,
                                     axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
Exemple #9
0
    def call(self, inputs, state):
        char_inputs = inputs[0]
        state_inputs = inputs[1]

        check_state_0 = tf.reduce_sum(state_inputs, axis=-1)
        check_state_1 = tf.reduce_sum(check_state_0, axis=-1)
        state_inputs_indices_for_lexicon = tf.where(
            tf.not_equal(check_state_0, 0))
        state_inputs_indices_for_not_lexicon = tf.squeeze(
            tf.where(tf.equal(check_state_1, 0)))

        state_inputs_indices_for_not_lexicon = tf.cond(
            pred=tf.equal(tf.rank(state_inputs_indices_for_not_lexicon), 0),
            true_fn=lambda: tf.expand_dims(
                state_inputs_indices_for_not_lexicon, axis=0),
            false_fn=lambda: state_inputs_indices_for_not_lexicon)

        char_inputs_indices_for_lexicon = tf.where(
            tf.not_equal(tf.reduce_sum(check_state_0, axis=-1), 0))
        char_inputs_indices_for_not_lexicon = tf.where(
            tf.equal(tf.reduce_sum(check_state_0, axis=-1), 0))

        if self._state_is_tuple:
            c, h = state
        else:
            c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

        gate_inputs = tf.matmul(tf.concat([char_inputs, h], 1), self._kernel)
        gate_inputs = tf.nn.bias_add(gate_inputs, self._bias)

        i, j, f, o = tf.split(value=gate_inputs, num_or_size_splits=4, axis=1)

        new_c_without_lexicon = self._new_c_without_lexicon(
            i=i,
            f=f,
            j=j,
            c=c,
            indices_tensor=state_inputs_indices_for_not_lexicon)
        new_c = tf.scatter_nd_update(
            self._char_state_tensor,
            indices=char_inputs_indices_for_not_lexicon,
            updates=new_c_without_lexicon)

        new_c = tf.cond(tf.not_equal(
            tf.shape(state_inputs_indices_for_not_lexicon)[-1],
            tf.shape(state_inputs)[0]),
                        true_fn=lambda: self._if_not_empty_lexicon_state(
                            i, j, char_inputs, state_inputs,
                            char_inputs_indices_for_lexicon,
                            state_inputs_indices_for_lexicon, new_c),
                        false_fn=lambda: new_c)

        new_h = tf.multiply(self._activation(new_c), tf.nn.sigmoid(o))

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat([new_c, new_h], 1)

        return new_h, new_state
Exemple #10
0
    def call(self, inputs, state):

        sigmoid = math_ops.sigmoid()
        tanh = math_ops.tanh()
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        delt_t = float(array_ops.slice(inputs, 0, 1))  #时间差, 暂时转为浮点型
        text = array_ops.slice(inputs, 1, 128)  #text向量
        concat_time = _linear([text, h], 3 * self.num_units,
                              bias=True)  # 时间衰减部分
        concat_text = _linear([text, h], 3 * self.num_units, bias=True)  # 文本部分
        output = _linear([text, h], self.num_units, bias=True)

        i0, j0, f0 = array_ops.split(value=concat_time,
                                     num_or_size_splits=3,
                                     axis=1)  # 时间衰减部分
        i1, j1, f1 = array_ops.split(value=concat_text,
                                     num_or_size_splits=3,
                                     axis=1)  # 文本部分

        new_c = c * math_ops.exp(
            -1 * delt_t) * sigmoid(f0 + self._forget_bias) + (
                1 - math_ops.exp(-1 * delt_t)) * sigmoid(i0) * tanh(j0)
        new_c = new_c * sigmoid(f1 +
                                self._forget_bias) + sigmoid(i1) * tanh(j1)
        new_h = tanh(new_c) * sigmoid(output)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_state
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                one = constant_op.constant(1, dtype=dtypes.int32)
                c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)  # tf.split(state, 2, axis=1
            concat = self.stochastic_linear([inputs, h], 4 * self._num_units, True)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = tf.split(concat, 4, axis=1)
            if self._layer_norm:
                i = self._norm(i, "input", dtype=inputs.dtype)
                j = self._norm(j, "transform", dtype=inputs.dtype)
                f = self._norm(f, "forget", dtype=inputs.dtype)
                o = self._norm(o, "output", dtype=inputs.dtype)

            new_c = (c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) *
                              tf.nn.tanh(j))
            new_h = tf.nn.tanh(new_c) * tf.sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = tf.concat([new_c, new_h], axis=1)
            return new_h,
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM)."""
        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        concat = self._line_sep([inputs, h], 4 * self._num_units, bias=False)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=concat,
                                     num_or_size_splits=4,
                                     axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        bn_new_c = self.layer_norm(new_c, scope='c')
        new_h = self._activation(bn_new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
Exemple #13
0
    def __call__(self, inputs, states):
        """Now we have multiple states, state->states"""
        sigmoid = tf.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            hs = ()
            for state in states:
                # every state is a tuple of (c,h)
                c, h = state
                hs += (h, )
        else:
            hs = ()
            for state in states:
                c, h = array_ops.split(value=state,
                                       num_or_size_splits=2,
                                       axis=1)
                hs += (h, )

        output_size = 4 * self._num_units
        concat = tensor_network_linear(inputs, hs, output_size, True)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=concat,
                                     num_or_size_splits=4,
                                     axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(new_c) * sigmoid(o)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
def get_cost_l(encoder_embed_input,
               decoder_embed_input,
               l_y,
               decoder_y,
               target_sequence_length,
               max_target_sequence_length,
               reuse=False):
    encode_outputs, encode_states, z_mean, z_stddev, new_states = encoder(
        encoder_embed_input, l_y, keep_prob, reuse)
    samples = tf.random_normal(tf.shape(z_stddev))
    z = z_mean + tf.exp(z_stddev * 0.5) * samples
    h_state = tf.nn.softplus(tf.matmul(z, weights_de['w_']) + biases_de['b_'])
    #c_state = tf.nn.softplus(tf.matmul(z, weights_de['w_2']) + biases_de['b_2'])
    decoder_initial_state = LSTMStateTuple(h_state, encode_states[1])
    decoder_output, predicting_logits, training_logits, masks, target = decoder(
        decoder_embed_input, decoder_y, target_sequence_length,
        max_target_sequence_length, decoder_initial_state, keep_prob, reuse)
    #KL term-------------
    latent_loss = 0.5 * tf.reduce_sum(
        tf.exp(z_stddev) - 1. - z_stddev + tf.square(z_mean), 1)
    latent_cost = tf.reduce_mean(latent_loss)

    encropy_loss = tf.contrib.seq2seq.sequence_loss(training_logits, target,
                                                    masks)  #/batch_size

    cost = tf.reduce_mean(encropy_loss + latentscale_iter * (latent_loss))

    return cost, encropy_loss, latent_cost, training_logits
Exemple #15
0
 def build_encoder_bi(self,
                      encoder_rnn_layer_size,
                      encoder_num_units,
                      encoder_cell_type="LSTM"):
     encoder_cell_type = encoder_cell_type.lower()
     #定义encoder的rnn_layer
     with tf.name_scope("encoder"):
         fw_rnn_layer = self.get_rnn_layer(encoder_rnn_layer_size,
                                           encoder_num_units,
                                           encoder_cell_type)
         bw_rnn_layer = self.get_rnn_layer(encoder_rnn_layer_size,
                                           encoder_num_units,
                                           encoder_cell_type)
         #双向rnn展开
         '''
             bi_state的结构:(fw_state,bw_state)
                             fw_state=((c,h),(c,h)...)
                             bw_state = ((c,h),(c,h)...)
         '''
         bi_outputs, bi_state = tf.nn.bidirectional_dynamic_rnn(
             fw_rnn_layer,
             bw_rnn_layer,
             self.embedded_src_batch,
             sequence_length=self.src_batch_seq_len,
             time_major=False,
             dtype=tf.float32)
         '''                    
                             将前向rnn和后向rnn的output的最后一个维度连接起来,比如 fw:[128,10,100],bw:[128,10,100],那么连接后为[128,10,200]
                             这样导致一个问题就是encoder rnn的输出和decoder rnn的输入对应不上了,encoder因为拼接了fw和bw变成了200,
                             有  2个解决办法,将encoder rnn的num_units变成decoder的一半或者反过来将decoder rnn的num_units增大一倍
         '''
         encoder_outpus = tf.concat(bi_outputs, -1)
         '''
             bi_state同样有fw和bw,怎样拼接在一起呢?
                 (1)参照output的拼接直接,拼接c和h的最后一个维度:这种方法问题在于tuple必须是特殊类型的tuple,比如LSTMStateTuple
                 (2)直接将fw和bw的结果堆叠在一起这样cell的个数相当于翻了一倍,需要调整encoder或者decoder的cell个数
                                     另外使用不同的cell也有区别的,LSTM有c和h,而GRU只有一个值。
         '''
         fw_encoder_state = bi_state[0]
         bw_encoder_state = bi_state[1]
         encoder_states = []
         if encoder_cell_type == "lstm" or encoder_cell_type == "basiclstm":
             #i循环cell的个数
             for i in range(encoder_rnn_layer_size):
                 #连接当前cell fw和bw的c,h
                 c = tf.concat(
                     [fw_encoder_state[i][0], bw_encoder_state[i][0]], -1)
                 h = tf.concat(
                     [fw_encoder_state[i][1], bw_encoder_state[i][1]], -1)
                 encoder_states.append(LSTMStateTuple(c, h))
         else:  #GRU
             #state中每个cell只有一个值
             for i in range(encoder_rnn_layer_size):
                 state = tf.concat(
                     [fw_encoder_state[i], bw_encoder_state[i]], -1)
                 encoder_states.append(state)
         encoder_states = tuple(encoder_states)
     print("bidirectional encoder-encoder_outputs:", encoder_outpus)
     print("bidirectional encoder-encoder_states:", encoder_states)
     return encoder_outpus, encoder_states
Exemple #16
0
    def call(self, inputs, state):
        sigmoid = math_ops.sigmoid
        c, h = state
        gate_inputs = math_ops.matmul(array_ops.concat([inputs, h], 1),
                                      self._kernel)
        gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

        master_f_gate = self.cummax(gate_inputs[:, :self._levels])
        master_f_gate = array_ops.expand_dims(master_f_gate, -1)
        master_i_gate = self.cummax(gate_inputs[:,
                                                self._levels:self._levels * 2],
                                    reversed=True)
        master_i_gate = array_ops.expand_dims(master_i_gate, -1)
        f, i, o, j = array_ops.split(value=gate_inputs[:, self._levels * 2:],
                                     num_or_size_splits=4,
                                     axis=None)
        c_last = array_ops.reshape(c, [-1, self.levels, self.chunk_size])
        overlap = master_f_gate * master_i_gate
        c_out = overlap * (sigmoid(f) * c_last + sigmoid(i) * c) + \
            (master_f_gate - overlap) * c_last + \
            (master_i_gate - overlap) * self._activation(j)
        h_out = sigmoid(o) * self._activation(c_out)
        new_c = array_ops.reshape(c_out, [-1, self._num_units])
        new_h = array_ops.reshape(h_out, [-1, self._num_units])

        new_state = LSTMStateTuple(new_c, new_h)
        return new_h, new_state
Exemple #17
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM).
        Args:
          inputs: `2-D` tensor with shape `[batch_size, input_size]`.
          state: An `LSTMStateTuple` of state tensors, each shaped
            `[batch_size, self.state_size]`, if `state_is_tuple` has been set to
            `True`.  Otherwise, a `Tensor` shaped
            `[batch_size, 2 * self.state_size]`.
        Returns:
          A pair containing the new hidden state, and the new state (either a
            `LSTMStateTuple` or a concatenated state, depending on
            `state_is_tuple`).
        """

        B = self._block_size
        # print('state_size')
        # print(state.get_shape().as_list())
        sigmoid = math_ops.sigmoid
        one = constant_op.constant(1, dtype=dtypes.int32)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)

        #gate_inputs = math_ops.matmul(
        #    array_ops.concat([inputs, h], 1), self._kernel)

        gate_inputs = BH_dense(inputs,
                               4 * self._num_units,
                               B,
                               self.transform,
                               kernel_weights=self._kernel)
        # gate_inputs = BH_matmul(
        #    array_ops.concat([inputs, h], 1), self._kernel, B, "Fourier")
        gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=gate_inputs,
                                     num_or_size_splits=4,
                                     axis=one)

        forget_bias_tensor = constant_op.constant(self._forget_bias,
                                                  dtype=f.dtype)
        # Note that using `add` and `multiply` instead of `+` and `*` gives a
        # performance improvement. So using those at the cost of readability.
        add = math_ops.add
        multiply = math_ops.multiply
        #multiply = Circ_matmul()
        new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))),
                    multiply(sigmoid(i), self._activation(j)))
        new_h = multiply(self._activation(new_c), sigmoid(o))
        new_h = bit_utils.round_bit(new_h, self._f_bit)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
def _default_dropout_state_filter_visitor(substate):
    from tensorflow.python.ops.rnn_cell_impl import LSTMStateTuple  # pylint: disable=g-import-not-at-top
    if isinstance(substate, LSTMStateTuple):
        # Do not perform dropout on the memory state.
        return LSTMStateTuple(c=False, h=True)
    elif isinstance(substate, tensor_array_ops.TensorArray):
        return False
    return True
def get_cost_l(encoder_embed_input,
               decoder_embed_input,
               l_y,
               decoder_y,
               target_sequence_length,
               max_target_sequence_length,
               reuse=False):
    encode_outputs, encode_states, z_mean, z_stddev, new_states = encoder(
        encoder_embed_input, l_y, keep_prob, reuse)
    samples = tf.random_normal(tf.shape(z_stddev))
    z = z_mean + tf.exp(z_stddev * 0.5) * samples
    h_state = tf.nn.softplus(tf.matmul(z, weights_de['w_']) + biases_de['b_'])
    #c_state = tf.nn.softplus(tf.matmul(z, weights_de['w_2']) + biases_de['b_2'])
    decoder_initial_state = LSTMStateTuple(h_state, encode_states[1])
    decoder_output, predicting_logits, training_logits, masks, target = decoder(
        decoder_embed_input, decoder_y, target_sequence_length,
        max_target_sequence_length, decoder_initial_state, keep_prob, reuse)
    #KL term-------------
    latent_loss = 0.5 * tf.reduce_sum(
        tf.exp(z_stddev) - 1. - z_stddev + tf.square(z_mean), 1)
    latent_cost = tf.reduce_mean(latent_loss)
    #laten_ = latentscale_iter * tf.reduce_mean(latent_loss)

    #encropy_loss = tf.contrib.seq2seq.sequence_loss(training_logits, target, masks)
    decoder_input = tf.nn.embedding_lookup(dic_embeddings, decoder_embed_input)
    s_loss = tf.square(training_logits - decoder_input)
    mask_loss = tf.reduce_sum(tf.transpose(s_loss, [2, 0, 1]), 0)
    encropy_loss = tf.reduce_mean(tf.multiply(mask_loss, masks), 1)
    #print encropy_loss
    #print latent_loss
    #decoder_input=tf.nn.embedding_lookup(dic_embeddings, decoder_embed_input)
    #s_loss=tf.square(training_logits-decoder_input)
    #print s_loss
    #s_loss=-training_logits*tf.log(decoder_input)
    #print vae
    #mask_loss=tf.reduce_mean(tf.transpose(vae, [2, 0, 1]),0)
    #print mask_loss
    #encropy_loss = tf.reduce_sum(tf.multiply(mask_loss, masks))
    #print encropy_loss
    #encropy_loss=-tf.reduce_mean(tf.reduce_sum(training_logits*tf.log(decoder_input),reduction_indices=[1]))

    #print encropy_loss
    #print decoder_input
    #print training_logits
    #print masks
    #encropy_loss=tf.nn.cross_entropy_with_logits(decoder_input,training_logits,masks)
    #print encropy_loss
    #mask_loss=tf.reduce_sum(tf.transpose(s_loss, [2, 0, 1]),0)
    #encropy_loss=tf.reduce_mean(tf.multiply(mask_loss,masks),1)
    #print 'encropy_loss',encropy_loss
    #print 'latent_loss',latent_loss
    #cost = encropy_loss +
    #print latent_loss
    #print encropy_loss
    cost = tf.add(encropy_loss, (latentscale_iter * (latent_loss)))

    #print 'cost',cost
    return cost, encropy_loss, latent_cost, training_logits
Exemple #20
0
    def build_graph(self):
        """
        builds the computational graph that performs a step-by-step evaluation
        of the input data batches
        """

        self.unpacked_input_data = utility.unpack_into_tensorarray(
            self.input_data, 1, self.sequence_length)

        outputs = tf.TensorArray(tf.float32, self.sequence_length)
        free_gates = tf.TensorArray(tf.float32, self.sequence_length)
        allocation_gates = tf.TensorArray(tf.float32, self.sequence_length)
        write_gates = tf.TensorArray(tf.float32, self.sequence_length)
        read_weightings = tf.TensorArray(tf.float32, self.sequence_length)
        write_weightings = tf.TensorArray(tf.float32, self.sequence_length)
        usage_vectors = tf.TensorArray(tf.float32, self.sequence_length)

        controller_state = self.controller.get_state(
        ) if self.controller.has_recurrent_nn else (tf.zeros(1), tf.zeros(1))
        memory_state = self.memory.init_memory()
        if not isinstance(controller_state, LSTMStateTuple):
            controller_state = LSTMStateTuple(controller_state[0],
                                              controller_state[1])
        final_results = None

        with tf.variable_scope("sequence_loop") as scope:
            time = tf.constant(0, dtype=tf.int32)

            final_results = tf.while_loop(
                cond=lambda time, *_: time < self.sequence_length,
                body=self._loop_body,
                loop_vars=(time, memory_state, outputs, free_gates,
                           allocation_gates, write_gates, read_weightings,
                           write_weightings, usage_vectors, controller_state),
                parallel_iterations=32,
                swap_memory=True)

        dependencies = []
        if self.controller.has_recurrent_nn:
            dependencies.append(self.controller.update_state(final_results[9]))

        with tf.control_dependencies(dependencies):
            self.packed_output = utility.pack_into_tensor(final_results[2],
                                                          axis=1)
            self.packed_memory_view = {
                'free_gates':
                utility.pack_into_tensor(final_results[3], axis=1),
                'allocation_gates':
                utility.pack_into_tensor(final_results[4], axis=1),
                'write_gates':
                utility.pack_into_tensor(final_results[5], axis=1),
                'read_weightings':
                utility.pack_into_tensor(final_results[6], axis=1),
                'write_weightings':
                utility.pack_into_tensor(final_results[7], axis=1),
                'usage_vectors':
                utility.pack_into_tensor(final_results[8], axis=1)
            }
Exemple #21
0
  def call(self, inputs, state):
    """Long short-term memory cell with attention (LSTMA)."""
    if self._state_is_tuple:
      state,histotry = state

    cell_output, new_state = self._cell(inputs, state)
    #print("new state",new_state)



    output = cell_output

    #print("output",output)


    c_new, h_new = new_state


    # print("c_new", c_new)
    # print("h_new", h_new)

    label_emb = tf.nn.relu(tf.matmul(output, self.emb_M3))
    # label_emb = tf.expand_dims(label_emb, axis=1)
    #print("label emb",label_emb)
    #print("new stat", new_state)

    pre_history = histotry
    pre_history= tf.reshape(pre_history, shape=[-1, self.config.use_K_histroy, self.config.label_emb_size])
    #print("pre_history",pre_history)

    new_history = tf.slice(pre_history, [0, 1, 0], [-1, self.config.use_K_histroy - 1, self.config.label_emb_size])

    #print("new_history", new_history)
    # print("label_emb", label_emb)

    concat_his = tf.concat([new_history, tf.expand_dims(label_emb,axis=1)], axis=1)
    #print("concat_his_tmp", concat_his)

    concat_all = tf.concat([concat_his,  tf.expand_dims(c_new,axis=1)], axis=1)

    #print("c_new",c_new)

    concat_all_flatten = tf.reshape(concat_all,
                                shape=[-1, (self.config.use_K_histroy + 1) * self.config.label_emb_size])

    concat_his_flatten = tf.reshape(concat_his,
                                     shape=[-1, self.config.use_K_histroy * self.config.label_emb_size])

    c = tf.nn.relu(tf.matmul(concat_all_flatten, self.emb_M4k))

    new_state= LSTMStateTuple(c, h_new)

    new_wrapper_state = (new_state, concat_his_flatten)



    return output, new_wrapper_state
def autoencoder_seq(x: tf.Tensor, noise, initial_state, seq_len, n_joints,
                    lstm_size):
    """
	:param x: Tensor of shape [BATCH_SIZE, MOTION_SELECTION]
	:return: Tuple of Tensors of shapes
	 	( [BATCH_SIZE, MOTION_SELECTION] , [BATCH_SIZE, SEQ_LEN, N_JOINTS] )
	"""
    motion_selection = x.shape[1].value
    with tf.variable_scope('encoder'):
        state_predictions, final_predictor_state = encoder(
            x, noise, LSTMStateTuple(*initial_state[0]), seq_len, n_joints,
            motion_selection)
    with tf.variable_scope('decoder'):
        predicted_motion_selection, final_classifier_state = decoder(
            state_predictions, LSTMStateTuple(*initial_state[1]), n_joints,
            motion_selection)
    return predicted_motion_selection, state_predictions, (
        final_predictor_state, final_classifier_state)
    def __init__(self, state_space_size, action_space_size, scope, trainer):
        with tf.variable_scope(scope):
            # Input
            self.inputs = tf.placeholder(shape=[None, state_space_size], dtype=tf.float32)

            # Recurrent network for temporal dependencies
            lstm_cell = BasicLSTMCell(256, state_is_tuple=True)
            c_init = np.zeros_like((1, lstm_cell.state_size.c), dtype=np.float32)
            h_init = np.zeros_like((1, lstm_cell.state_size.h), dtype=np.float32)
            self.state_init = [c_init, h_init]
            c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
            h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
            self.state_in = (c_in, h_in)
            state_in = LSTMStateTuple(c_in, h_in)
            lstm_outputs, lstm_state = tf.nn.dynamic_rnn(lstm_cell, self.inputs,
                                                         initial_state=state_in,
                                                         sequence_length=tf.shape(self.inputs)[0],
                                                         time_major=False)
            lstm_c, lstm_h = lstm_state
            self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
            rnn_out = tf.reshape(lstm_outputs, [-1, 256])

            # Output layers for policy and value estimations
            self.policy = slim.fully_connected(rnn_out, action_space_size,
                                               activation_fn=tf.nn.softmax,
                                               weights_initializer=normalized_columns_initializer(0.01),
                                               biases_initializer=None)
            self.value = slim.fully_connected(rnn_out, 1,
                                              activation_fn=None,
                                              weights_initializer=normalized_columns_initializer(1.0),
                                              biases_initializer=None)

            # Only the worker network need ops for loss functions and gradient updating.
            if scope != 'global':
                self.actions = tf.placeholder(shape=[None], dtype=tf.int32)
                self.actions_onehot = tf.one_hot(self.actions, action_space_size, dtype=tf.float32)
                self.target_v = tf.placeholder(shape=[None], dtype=tf.float32)
                self.advantages = tf.placeholder(shape=[None], dtype=tf.float32)

                self.responsible_outputs = tf.reduce_sum(self.policy * self.actions_onehot, [1])

                # Loss functions
                self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.target_v - tf.reshape(self.value, [-1])))
                self.entropy = - tf.reduce_sum(self.policy * tf.log(self.policy))
                self.policy_loss = -tf.reduce_sum(tf.log(self.responsible_outputs) * self.advantages)
                self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01

                # Get gradients from local network using local losses
                local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
                self.gradients = tf.gradients(self.loss, local_vars)
                self.var_norms = tf.global_norm(local_vars)
                grads, self.grad_norms = tf.clip_by_global_norm(self.gradients, 40.0)

                # Apply local gradients to global network
                global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
                self.apply_grads = trainer.apply_gradients(zip(grads, global_vars))
Exemple #24
0
def decoder(decoder_embed_input,
            decoder_y,
            target_length,
            max_target_length,
            l_z,
            l_y,
            states,
            keep_prob,
            reuse=False):
    with tf.variable_scope("decoder", reuse=reuse):
        #l_y = y_scale*l_y
        l_yz = tf.concat([l_z, l_y], 1)
        u_mean = tf.contrib.layers.fully_connected(inputs=l_yz,
                                                   num_outputs=a_size,
                                                   activation_fn=None,
                                                   scope="u_mean")
        u_stddev = tf.contrib.layers.fully_connected(inputs=l_yz,
                                                     num_outputs=a_size,
                                                     activation_fn=None,
                                                     scope="u_std")
        samples = tf.random_normal(tf.shape(u_stddev))
        l_u = u_mean + tf.exp(u_stddev * 0.5) * samples

        l_yzu = tf.concat([l_yz, l_u, l_y], 1)
        h_states = tf.nn.softplus(
            tf.matmul(l_yzu, weights_de['w_']) + biases_de['b_'])
        decoder_initial_state = LSTMStateTuple(states[0], h_states)  #(C,H)

        decode_lstm = tf.contrib.rnn.LSTMCell(n_hidden,
                                              forget_bias=1.0,
                                              state_is_tuple=True)
        decode_cell = tf.contrib.rnn.DropoutWrapper(decode_lstm,
                                                    output_keep_prob=keep_prob)
        output_layer = Dense(n_input)  #TOTAL_SIZE
        decoder_input_ = tf.concat([
            tf.fill([batch_size, 1], vocab_to_int['<GO>']), decoder_embed_input
        ], 1)  # add   1  GO to the end
        decoder_input = tf.nn.embedding_lookup(dic_embeddings, decoder_input_)
        decoder_input = tf.concat([decoder_input, decoder_y],
                                  2)  #dic_embedding+y(one-hot)
        # # input_=tf.transpose(decoder_input,[1,0,2])
        training_helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=decoder_input, sequence_length=target_length)
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
            decode_cell, training_helper, decoder_initial_state, output_layer)
        output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            training_decoder,
            impute_finished=True,
            maximum_iterations=max_target_length)
        predicting_logits = tf.identity(output.sample_id, name='predictions')
        training_logits = tf.identity(output.rnn_output, 'logits')
        masks = tf.sequence_mask(target_length,
                                 max_target_length,
                                 dtype=tf.float32,
                                 name='masks')  #(batch_size,max_target_length)
        return training_logits, masks, u_mean, u_stddev
    def call(self, inputs, state):
        """
        Args:
          inputs: `2-D` tensor with shape `[batch_size, input_size]`.
          state: An `LSTMStateTuple` of state tensors, each shaped
            `[batch_size, num_units]`, if `state_is_tuple` has been set to
            `True`.  Otherwise, a `Tensor` shaped
            `[batch_size, 2 * num_units]`.

        Returns:
          A pair containing the new hidden state, and the new state (either a
            `LSTMStateTuple` or a concatenated state, depending on
            `state_is_tuple`).
        """
        c, h = state  # state consists of same objects  # (b, mem_slot * mem_size)

        h_mat = tf.reshape(h, [-1, self._mem_slots, self._mem_size])
        inputs_mat = tf.reshape(inputs, [-1, self._mem_slots, self._mem_size])
        input_plus_h = array_ops.concat([inputs_mat, h_mat],
                                        2)  # (b, slots, 2*mem_size)
        # gate_inputs = math_ops.matmul(input_plus_h, self._kernel)  # (b, 2*units) * (2*units, 3)
        gate_inputs = tf.tensordot(
            input_plus_h, self._kernel,
            axes=[[2], [0]])  # (b, slots, 2*mem_size) (2*mem_size, 3)
        gate_inputs = nn_ops.bias_add(gate_inputs,
                                      self._bias)  # (b, 2*slots, 3)

        # vector -> matrix as initial state is vector
        mem_mat = tf.reshape(c, [-1, self._mem_slots, self._mem_size])
        att_mem_mat = self._attend_over_memory(mem_mat, inputs)
        # att_mem = tf.layers.flatten(att_mem_mat)

        # i = input_gate, f = forget_gate, o = output_gate
        i, f, o = array_ops.split(value=gate_inputs,
                                  num_or_size_splits=self.gate_num,
                                  axis=2)
        # print(i.get_shape(), "i")  # (b, slots, 1)

        forget_bias_tensor = constant_op.constant(self._forget_bias,
                                                  dtype=f.dtype)
        sigmoid = math_ops.sigmoid
        add = math_ops.add
        multiply = math_ops.multiply

        c_mat = tf.reshape(c, [-1, self._mem_slots, self._mem_size])
        new_c = add(multiply(c_mat, sigmoid(add(f, forget_bias_tensor))),
                    multiply(sigmoid(i), self._activation(att_mem_mat)))
        new_h = multiply(self._activation(new_c), sigmoid(o))

        # matrix -> vector
        new_c = tf.layers.flatten(new_c)
        new_h = tf.layers.flatten(new_h)

        new_state = LSTMStateTuple(new_c, new_h)
        return new_h, new_state
    def __init__(self,
                 num_units,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 num_proj=None,
                 proj_clip=None,
                 num_unit_shards=None,
                 num_proj_shards=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 activation=None,
                 reuse=None):

        super(CustomLSTMCell, self).__init__(_reuse=reuse)

        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._initializer = initializer
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._activation = activation or math_ops.tanh

        if num_proj:
            self._state_size = (LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
        self._linear1 = None
        self._linear2 = None
        if self._use_peepholes:
            self._w_f_diag = None
            self._w_i_diag = None
            self._w_o_diag = None
Exemple #27
0
  def call(self, inputs, state):
      """Long short-term memory cell with attention (LSTMA)."""
      if self._state_is_tuple:
        state, hisTrack= state
        states ,attns, attn_states=state

      #print("inputs",inputs)

      #print("state",state)

      cell_output, new_state = self._cell(inputs,state)
      #print("cell_out",cell_output)
      #print("new_state",new_state)

      new_state, _ ,_ =new_state

      c_prev,m_prev = new_state

      m_prev=tf.expand_dims(m_prev,axis=1)

      #print("c_prev",c_prev)
      #print("m_prev", m_prev)


      label_emb= tf.nn.relu(tf.matmul(cell_output,self.emb_M3))
      label_emb = tf.expand_dims(label_emb,axis=1)
      #print(label_emb)
      hisTrack=tf.reshape(hisTrack,shape=[-1,self.config.use_K_histroy,self.config.label_emb_size])

      new_hisTrack = tf.slice(hisTrack, [0,1, 0], [-1,self.config.use_K_histroy-1, self.config.label_emb_size])

      #print("new_hisTrack", new_hisTrack)
      #print("label_emb", label_emb)

      concat_hisTrack=tf.concat([new_hisTrack,label_emb],axis=1)
      #print("concat_hisTrack_tmp",concat_hisTrack)

      concat_all= tf.concat([concat_hisTrack,m_prev],axis=1)

      concat_all_flatten=tf.reshape(concat_all,shape=[-1,(self.config.use_K_histroy+1)*self.config.label_emb_size])

      concat_hisTrack_flatten=tf.reshape(concat_hisTrack,shape=[-1,self.config.use_K_histroy*self.config.label_emb_size])

      m =tf.nn.relu(tf.matmul(concat_all_flatten,self.emb_M4k))

      new_state_tuple= (LSTMStateTuple(cell_output,m),attns,attn_states)

      new_send_state=(new_state_tuple,concat_hisTrack_flatten)

      #print("new_send_state",new_send_state)
      #print("cell_output",cell_output)

      return cell_output, new_send_state
Exemple #28
0
    def __init__(self,
                 num_units,
                 highway=False,
                 cell_clip=None,
                 initializer=None,
                 forget_bias=1.0,
                 activation=None,
                 reuse=None,
                 name=None,
                 use_layer_norm=False):
        """Initialize the parameters for an LSTM cell with simplified highway connections as described in
        'Deep Semantic Role Labeling: What works and what's next' (He et al. 2017).

        Args:
          num_units: int, The number of units in the LSTM cell.
          highway: (optional) Python boolean describing whether to include highway connections
          cell_clip: (optional) A float value, if provided the cell state is clipped
            by this value prior to the cell output activation.
          initializer: (optional) The initializer to use for the weight matrices.
            Uses an orthonormal initializer if none is provided.
          forget_bias: Biases of the forget gate are initialized by default to 1
            in order to reduce the scale of forgetting at the beginning of
            the training.
          activation: Activation function of the inner states.  Default: `tanh`.
          reuse: (optional) Python boolean describing whether to reuse variables
            in an existing scope.  If not `True`, and the existing scope already has
            the given variables, an error is raised.
          name: String, the name of the layer. Layers with the same name will
            share weights, but to avoid mistakes we require reuse=True in such
            cases.
          use_layer_norm: (optional) Python boolean describing whether to use layer normalization
        """
        super(HighwayLSTMCell, self).__init__(_reuse=reuse, name=name)
        # Inputs must be 2-dimensional.
        self.input_spec = base_layer.InputSpec(ndim=2)

        self._num_units = num_units

        self._highway = highway
        self._cell_clip = cell_clip
        self._initializer = initializer
        self._forget_bias = forget_bias
        self._activation = activation or math_ops.tanh

        self._state_size = (LSTMStateTuple(num_units, num_units))
        self._output_size = num_units

        # initialized in self.build
        self._input_kernel = None
        self._hidden_kernel = None
        self._bias = None
        self.use_layer_norm = use_layer_norm
Exemple #29
0
def get_init_state(args, name, q_type, shape):
    hinit_embed = make_var('hinit_ebd_' + name, shape)
    cinit_embed = make_var('cinit_ebd_' + name, shape)
    h_init = tf.expand_dims(tf.nn.embedding_lookup(hinit_embed, q_type),
                            axis=0)
    c_init = tf.expand_dims(tf.nn.embedding_lookup(cinit_embed, q_type),
                            axis=0)
    cell_init_state = {
        'lstm': lambda: LSTMStateTuple(c_init, h_init),
        'sru': lambda: h_init,
        'gru': lambda: h_init,
        'rnn': lambda: h_init
    }[args.cell.replace('bi-', '')]()
    return cell_init_state
Exemple #30
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM).

        Args:
          inputs: `2-D` tensor with shape `[batch_size x input_size]`.
          state: An `LSTMStateTuple` of state tensors, each shaped
            `[batch_size x self.state_size]`, if `state_is_tuple` has been set to
            `True`.  Otherwise, a `Tensor` shaped
            `[batch_size x 2 * self.state_size]`.

        Returns:
          A pair containing the new hidden state, and the new state (either a
            `LSTMStateTuple` or a concatenated state, depending on
            `state_is_tuple`).
        """
        sigmoid = tf.sigmoid
        self._step = self._step + 1

        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

        if self._linear is None:
            self._linear = _Linear([inputs, h], 4 * self._num_units, True)

        i, j, f, o = tf.split(value=self._linear([inputs, h]),
                              num_or_size_splits=4,
                              axis=1)

        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h_cnt = self._activation(new_c) * sigmoid(o)

        if self._step % self._skip_size == 0:
            w_h_skip, b_h_skip = self.weight_bias(
                [self._num_units, self._num_units], [self._num_units])
            new_h_skip = sigmoid(tf.matmul(h, w_h_skip) + b_h_skip)
            masked_w1, masked_w2 = self.masked_weight(_load=False)
            new_h = new_h_cnt * masked_w1 + new_h_skip * masked_w2

        else:
            new_h = new_h_cnt

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat([new_c, new_h], 1)
        return new_h, new_state