Beispiel #1
0
    def step(self, x, states):
        """
            LSTM的几个表达式都在这
        :param x:
        :param states: 上个时刻的输出和隐层状态st
        :return:
        """
        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        # 按照steps的维度重复n次,(sample, step, dim)
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        # softmax
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) +
            K.dot(context, self.C_r) + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) +
            K.dot(context, self.C_z) + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) +
            K.dot(context, self.C_p) + self.b_p)

        # new hidden state:
        st = (1 - zt) * stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) +
            K.dot(context, self.C_o) + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]
Beispiel #2
0
 def call(self, inputs, cond=None, memory=None):
     if memory is None:
         mem = K.zeros(
             (K.shape(inputs)[0], self.mem_size, K.shape(inputs)[-1]))
     else:
         mem = K.variable(K.cast_to_floatx(memory))
     inputs = K.concatenate([mem, inputs], axis=1)
     ret = super(GatedConv, self).call(inputs)
     if cond is not None:
         d = Dense(2 * self.out_dims, use_bias=False, activation='linear')
         ret = ret + d(cond)
     ret = self.nongate_activation(
         ret[:, :, :self.out_dims]) * activations.sigmoid(
             ret[:, :, self.out_dims:])
     if self.return_memory:
         ret = ret, inputs[:, :self.mem_size, :]
     return ret
Beispiel #3
0
    def call(self, inputs, states, constants):  #inputs 是embeding  states是前一步h
        #

        # prev_output = states[0]
        # h = K.dot(inputs, self.kernel)
        # output = h + K.dot(prev_output, self.recurrent_kernel)

        # compute attention
        # H (timeSteps,lat_dim)
        # H * W_H
        # h维度

        h_tm = states[0]
        self.x_seq = constants[0]

        self._uxpb = _time_distributed_dense(self.x_seq,
                                             self.U_a,
                                             b=self.b_a,
                                             input_dim=self.units,
                                             timesteps=self.encoder_ts,
                                             output_dim=self.units)

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(h_tm, self.encoder_ts)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(
            activations.tanh(
                _Wxstm +
                self._uxpb),  # e_ij = a(s_(i-1),h_j)  where h_j = self._uxpb
            K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.encoder_ts)
        at /= at_sum_repeated  # Eq(6) vector of size (batchsize, timesteps, 1) , softmax : length timesteps  from stm

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1),
                            axis=1)  # Eq(5)   length : batchsize*latent_dim

        contextInput = K.concatenate([inputs, context], axis=1)
        #拼接context和input

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(h_tm, self.W_z) + K.dot(contextInput, self.C_z) + self.b_z)

        rt = activations.sigmoid(
            K.dot(h_tm, self.W_r) + K.dot(contextInput, self.C_r) +
            self.b_r)  # f_t 对应lstm遗忘门,此处没有x_t输入

        t_ht = activations.tanh(
            K.dot((rt * h_tm), self.U_p) + K.dot(contextInput, self.C_p) +
            self.b_p)

        ht = (1 - zt) * h_tm + zt * t_ht

        return ht, [ht]


# # Let's use this cell in a RNN layer:
#
# cell = MinimalRNNCell(32)
# x = keras.Input((None, 5))
# layer = RNN(cell)
# y = layer(x)
    def call(self, inputs, states, constants):
        '''
        call函数 会在RNN中被调用然后被RNN改写 此时constant参数可用
        :param inputs: [wt; v_g] 维度为self.input_dim
        :param states: 前一步ht,mt
        :param constants: cnn_encoder outputs
        :return:
        '''
        h_tm = states[0]  # last hidden state
        m_tm = states[1]  # last memory cell
        self.v_seq = constants[
            0]  # [self.cnn_encoder_k, self.units] self.units=cnn_encoder_d
        """
            f-gate
        """
        ft = activations.sigmoid(
            K.dot(h_tm, self.W_f) + K.dot(inputs, self.U_f) + self.b_f)
        """
            i-gate
        """
        it = activations.sigmoid(
            K.dot(h_tm, self.W_i) + K.dot(inputs, self.U_i) + self.b_i)
        """
            o-gate
        """
        ot = activations.sigmoid(
            K.dot(h_tm, self.W_o) + K.dot(inputs, self.U_o) + self.b_o)
        """
            g-gate (sentinel gate)
        """
        gt = activations.sigmoid(
            K.dot(h_tm, self.W_g) + K.dot(inputs, self.U_g) + self.b_g)
        """
            at-renew input
        """
        at = activations.tanh(
            K.dot(h_tm, self.W_a) + K.dot(inputs, self.U_a) + self.b_a)
        """
            mt-memory cell
        """
        mt = m_tm * ft + it * at
        """
            ht-hidden state
        """
        ht = ot * activations.tanh(mt)
        """
            st-visual sentinel
        """
        st = gt * activations.tanh(mt)
        """
            ct-visual context
        """
        st = K.expand_dims(st, axis=1)
        # 将st合并进来一起计算权重参数[?, k+1, d] d=self.units 与论文的处理稍有不同
        self.v_expand = K.concatenate([self.v_seq, st], axis=1)
        # one_matrix = K.ones((self.cnn_encoder_k + 1, 1))
        vtt = K.dot(self.v_expand, self.W_z)
        dtt = K.repeat(K.dot(ht, self.U_z),
                       self.cnn_encoder_k + 1)  # (?, k + 1, k + 1)
        tantt = K.tanh(vtt + dtt)

        zt = K.dot(tantt, self.W_h)

        alpha_t = activations.softmax(zt)  # (?, k + 1, 1)
        # alpha_t = K.expand_dims(alpha_t)  # (?, k + 1, 1)
        # 将st,v1,...,vk包括在内直接加权求和 与论文的处理稍有不同 (?, k + 1, units)
        # 输出(?, units)
        ct = K.squeeze(K.batch_dot(alpha_t, self.v_expand, axes=1),
                       axis=1)  # batch_dot 针对 k + 1
        ht_plus_ct = ht + ct

        return ht_plus_ct, [ht, mt]