Exemple #1
0
    def rnn_step_backward(self, dnext_h, cache):
        """
        Backward pass for a single timestep of a vanilla RNN.

        Inputs:
        - dnext_h: Gradient of loss with respect to next hidden state, of shape (N, H)
        - cache: Cache object from the forward pass

        Returns a tuple of:
        - dx: Gradients of input data, of shape (N, D)
        - dprev_h: Gradients of previous hidden state, of shape (N, H)
        - dWx: Gradients of input-to-hidden weights, of shape (D, H)
        - dWh: Gradients of hidden-to-hidden weights, of shape (H, H)
        - db: Gradients of bias vector, of shape (H,)
        """
        dx, dprev_h, dWx, dWh, db = None, None, None, None, None
        ##############################################################################
        # TODO: Implement the backward pass for a single step of a vanilla RNN.      #
        #                                                                            #
        # HINT: For the tanh function, you can compute the local derivative in terms #
        # of the output value from tanh.                                             #
        ##############################################################################
        x, prev_h, Wx, Wh, dtanh = cache
        dz = dnext_h * dtanh
        dx = Tools.matmul(dz, Wx.T)
        dprev_h = Tools.matmul(dz, Wh.T)
        dWx = Tools.matmul(x.T, dz)
        dWh = Tools.matmul(prev_h.T, dz)
        db = np.sum(dz, axis=0)

        ##############################################################################
        #                               END OF YOUR CODE                             #
        ##############################################################################
        return dx, dprev_h, dWx, dWh, db
Exemple #2
0
    def lstm_step_backward(self, dnext_h, dnext_c, cache):
        """
        Backward pass for a single timestep of an LSTM.

        Inputs:
        - dnext_h: Gradients of next hidden state, of shape (N, H)
        - dnext_c: Gradients of next cell state, of shape (N, H)
        - cache: Values from the forward pass

        Returns a tuple of:
        - dx: Gradient of input data, of shape (N, D)
        - dprev_h: Gradient of previous hidden state, of shape (N, H)
        - dprev_c: Gradient of previous cell state, of shape (N, H)
        - dWx: Gradient of input-to-hidden weights, of shape (D, 4H)
        - dWh: Gradient of hidden-to-hidden weights, of shape (H, 4H)
        - db: Gradient of biases, of shape (4H,)
        """
        dx, dprev_h, dprev_c, dWx, dWh, db = None, None, None, None, None, None
        #############################################################################
        # TODO: Implement the backward pass for a single timestep of an LSTM.       #
        #                                                                           #
        # HINT: For sigmoid and tanh you can compute local derivatives in terms of  #
        # the output value from the nonlinearity.                                   #
        #############################################################################
        x, prev_h, prev_c, Wx, Wh, i, f, o, g, next_c = cache

        dnext_c = dnext_c + o * (
            1 - np.tanh(next_c)**2) * dnext_h  # next_h = o*np.tanh(next_c)
        di = dnext_c * g  # next_c = f*prev_c + i*g
        df = dnext_c * prev_c  # next_c = f*prev_c + i*g
        do = dnext_h * np.tanh(next_c)  # next_h = o*np.tanh(next_c)
        dg = dnext_c * i  # next_h = o*np.tanh(next_c)
        dprev_c = f * dnext_c  # next_c = f*prev_c + i*g
        dz = np.hstack((i * (1 - i) * di, f * (1 - f) * df, o * (1 - o) * do,
                        (1 - g**2) * dg))  # 共四部分

        dx = Tools.matmul(dz, Wx.T)
        dprev_h = Tools.matmul(dz, Wh.T)
        dWx = Tools.matmul(x.T, dz)
        dWh = Tools.matmul(prev_h.T, dz)

        db = np.sum(dz, axis=0)

        ##############################################################################
        #                               END OF YOUR CODE                             #
        ##############################################################################

        return dx, dprev_h, dprev_c, dWx, dWh, db
Exemple #3
0
    def bpDelta(self):

        deltaPrevReshapped = Tools.matmul(self.deltaOri, self.w.T)

        self.deltaPrev = deltaPrevReshapped if self.needReshape is False else deltaPrevReshapped.reshape(
            self.shapeOfOriIn)
        return self.deltaPrev
Exemple #4
0
 def bpWeights(self, input, lrt):
     dw = Tools.matmul(input.T, self.deltaOri)
     db = np.sum(self.deltaOri, axis=0, keepdims=True).reshape(self.b.shape)
     weight = (self.w, self.b)
     dweight = (dw, db)
     # 元组按引用传递
     self.optimizerObj.getUpdWeights(weight, dweight, lrt)
Exemple #5
0
    def gru_step_forward(self, x, prev_h, Wzx, Wzh, bz, Wax, War, ba):
        """
        Forward pass for a single timestep of an LSTM.

        The input data has dimension D, the hidden state has dimension H, and we use
        a minibatch size of N.

        Note that a sigmoid() function has already been provided for you in this file.

        Inputs:
        - x: Input data, of shape (N, D)
        - prev_h: Previous hidden state, of shape (N, H)
        - prev_c: previous cell state, of shape (N, H)
        - Wzx: Input-to-hidden weights, of shape (D, 4H)
        - Wh: Hidden-to-hidden weights, of shape (H, 4H)
        - b: Biases, of shape (4H,)

        Returns a tuple of:
        - next_h: Next hidden state, of shape (N, H)
        - next_c: Next cell state, of shape (N, H)
        - cache: Tuple of values needed for backward pass.
        """
        next_h, cache = None, None
        #############################################################################
        # TODO: Implement the forward pass for a single timestep of an LSTM.        #
        # You may want to use the numerically stable sigmoid implementation above.
        # 首层,x(N,T,D), 向上变成xh(N,T,H)
        # 首层 Wx(D,H),   向上变成Wxh(H,H)
        #############################################################################
        H = prev_h.shape[1]
        # z_hat, of shape(N,4H)
        z_hat = Tools.matmul(x, Wzx) + Tools.matmul(prev_h, Wzh) + bz

        # of shape(N,H)
        r = Tools.sigmoid(z_hat[:, :H])
        z = Tools.sigmoid(z_hat[:, H:2 * H])

        a = Tools.matmul(x, Wax) + Tools.matmul(r * prev_h, War) + ba

        next_h = prev_h * (1. - z) + z * np.tanh(a)

        cache = (x, prev_h, Wzx, Wzh, Wax, War, z_hat, r, z, a)
        ##############################################################################
        #                               END OF YOUR CODE                             #
        ##############################################################################

        return next_h, cache
Exemple #6
0
    def lstm_step_forward(self, x, prev_h, prev_c, Wx, Wh, b):
        """
        Forward pass for a single timestep of an LSTM.

        The input data has dimension D, the hidden state has dimension H, and we use
        a minibatch size of N.

        Note that a sigmoid() function has already been provided for you in this file.

        Inputs:
        - x: Input data, of shape (N, D)
        - prev_h: Previous hidden state, of shape (N, H)
        - prev_c: previous cell state, of shape (N, H)
        - Wx: Input-to-hidden weights, of shape (D, 4H)
        - Wh: Hidden-to-hidden weights, of shape (H, 4H)
        - b: Biases, of shape (4H,)

        Returns a tuple of:
        - next_h: Next hidden state, of shape (N, H)
        - next_c: Next cell state, of shape (N, H)
        - cache: Tuple of values needed for backward pass.
        """
        next_h, next_c, cache = None, None, None
        #############################################################################
        # TODO: Implement the forward pass for a single timestep of an LSTM.        #
        # You may want to use the numerically stable sigmoid implementation above.
        # 首层,x(N,T,D), 向上变成xh(N,T,H)
        # 首层 Wx(D,H),   向上变成Wxh(H,H)
        #############################################################################
        H = prev_h.shape[1]
        #z , of shape(N,4H)
        z = Tools.matmul(x, Wx) + Tools.matmul(prev_h, Wh) + b

        # of shape(N,H)
        i = Tools.sigmoid(z[:, :H])
        f = Tools.sigmoid(z[:, H:2 * H])
        o = Tools.sigmoid(z[:, 2 * H:3 * H])
        g = np.tanh(z[:, 3 * H:])
        next_c = f * prev_c + i * g
        next_h = o * np.tanh(next_c)

        cache = (x, prev_h, prev_c, Wx, Wh, i, f, o, g, next_c)
        ##############################################################################
        #                               END OF YOUR CODE                             #
        ##############################################################################

        return next_h, next_c, cache
Exemple #7
0
 def fp(self, input):
     # 拉伸变形处理
     self.shapeOfOriIn = input.shape
     self.inputReshaped = input if self.needReshape is False else input.reshape(
         input.shape[0], -1)
     self.out = self.activator.activate(
         Tools.matmul(self.inputReshaped, self.w) + self.b)
     ####debug####
     # np.savetxt('G:/0tmp/0debug/x.csv',self.inputReshaped[0])
     # np.savetxt('G:/0tmp/0debug/w_c1.csv', self.w[:,0])
     # np.savetxt('G:/0tmp/0debug/w_c2.csv', self.w[:, 1])
     # np.savetxt('G:/0tmp/0debug/out.csv', self.out[0])
     ####debug end#####
     return self.out
Exemple #8
0
    def rnn_step_forward(self, x, prev_h, Wx, Wh, b):
        """
        Run the forward pass for a single timestep of a vanilla RNN that uses a tanh
        activation function.

        The input data has dimension D, the hidden state has dimension H, and we use
        a minibatch size of N.

        Inputs:
        - x: Input data for this timestep, of shape (N, D).
        - prev_h: Hidden state from previous timestep, of shape (N, H)
        - Wx: Weight matrix for input-to-hidden connections, of shape (D, H)
        - Wh: Weight matrix for hidden-to-hidden connections, of shape (H, H)
        - b: Biases of shape (H,)

        Returns a tuple of:
        - next_h: Next hidden state, of shape (N, H)
        - cache: Tuple of values needed for the backward pass.
        """

        next_h, cache = None, None
        ##############################################################################
        # TODO: Implement a single forward step for the vanilla RNN. Store the next  #
        # hidden state and any values you need for the backward pass in the next_h   #
        # and cache variables respectively.                                          #
        ##############################################################################
        z = Tools.matmul(x, Wx) + Tools.matmul(prev_h, Wh) + b

        next_h = np.tanh(z)

        dtanh = 1. - next_h * next_h
        cache = (x, prev_h, Wx, Wh, dtanh)
        ##############################################################################
        #                               END OF YOUR CODE                             #
        ##############################################################################
        return next_h, cache
Exemple #9
0
    def conv_efficient(self, x, w, b, output_size, vec_idx_key, strides=1):
        batches = x.shape[0]
        depth_i = x.shape[1]
        filter_size = w.shape[2]
        depth_o = w.shape[0]

        if 4 == x.ndim:  # 原始规格:
            input_size = x.shape[2]  #
            p = int(((output_size - 1) * strides + filter_size - input_size) /
                    2)  # padding尺寸
            # logger.debug("padding begin..")
            if p > 0:  # 需要padding处理
                x_pad = Tools.padding(x, p, self.dataType)
            else:
                x_pad = x
            st = time.time()
            logger.debug("vecting begin..")
            # 可以根据自己的硬件环境,在三种优化方式中选择较快的一种
            x_col = self.vectorize4conv_batches(x_pad, filter_size,
                                                output_size, strides)
            #x_col = spd.vectorize4conv_batches(x_pad, filter_size, output_size, strides)
            #x_col = vec_by_idx(x_pad, filter_size, filter_size,vec_idx_key,0, strides)

            logger.debug("vecting end.. %f s" % (time.time() - st))
        else:  # x_col规格
            x_col = x

        w_row = w.reshape(depth_o, x_col.shape[1])
        conv = np.zeros((batches, depth_o, (output_size * output_size)),
                        dtype=self.dataType)
        st1 = time.time()
        logger.debug("matmul begin..")
        #不广播,提高处理效率
        for batch in range(batches):
            conv[batch] = Tools.matmul(w_row, x_col[batch]) + b

        logger.debug("matmul end.. %f s" % (time.time() - st1))
        conv_return = conv.reshape(batches, depth_o, output_size, output_size)

        return conv_return
Exemple #10
0
    def conv4dw(self, x, w, output_size, b=0, strides=1, x_v=False):
        batches = x.shape[0]
        depth_i = x.shape[1]
        filter_size = w.shape[2]  # 过滤器尺寸,对应卷积层误差矩阵尺寸
        x_per_filter = filter_size * filter_size
        depth_o = w.shape[1]

        if False == x_v:  # 原始规格:
            input_size = x.shape[2]  #
            p = int(((output_size - 1) * strides + filter_size - input_size) /
                    2)  # padding尺寸
            if p > 0:  # 需要padding处理
                x_pad = Tools.padding(x, p, self.dataType)
            else:
                x_pad = x
            logger.debug("vec4dw begin..")
            x_col = self.vectorize4convdw_batches(x_pad, filter_size,
                                                  output_size, strides)
            logger.debug("vec4dw end..")
        else:  # x_col规格
            x_col = x

        w_row = w.reshape(batches, depth_o, x_per_filter)
        conv = np.zeros(
            (batches, depth_i, depth_o, (output_size * output_size)),
            dtype=self.dataType)
        logger.debug("conv4dw matmul begin..")
        for batch in range(batches):
            for col in range(depth_i):
                conv[batch, col] = Tools.matmul(w_row[batch], x_col[batch,
                                                                    col])

        conv_sum = np.sum(conv, axis=0)
        # transpose而不是直接reshape避免错位
        conv = conv_sum.transpose(1, 0, 2).reshape(depth_o, depth_i,
                                                   output_size, output_size)

        logger.debug("conv4dw matmul end..")
        return conv, x_col
Exemple #11
0
 def bpDelta(self):
     self.deltaPrev = Tools.matmul(self.deltaOri, self.w.T)
     return self.deltaPrev
Exemple #12
0
 def fp(self, input):
     self.out = self.activator.activate(
         Tools.matmul(input, self.w) + self.b)
     return self.out
Exemple #13
0
    def gru_step_backward(self, dnext_h, cache):
        """
        Backward pass for a single timestep of an LSTM.

        Inputs:
        - dnext_h: Gradients of next hidden state, of shape (N, H)
        - dnext_c: Gradients of next cell state, of shape (N, H)
        - cache: Values from the forward pass

        Returns a tuple of:
        - dx: Gradient of input data, of shape (N, D)
        - dprev_h: Gradient of previous hidden state, of shape (N, H)
        - dprev_c: Gradient of previous cell state, of shape (N, H)
        - dWx: Gradient of input-to-hidden weights, of shape (D, 4H)
        - dWh: Gradient of hidden-to-hidden weights, of shape (H, 4H)
        - db: Gradient of biases, of shape (4H,)
        """
        dx, dprev_h, dWzx, dWzh, dbz, dWax, dWar, dba = None, None, None, None, None, None, None, None
        #############################################################################
        # TODO: Implement the backward pass for a single timestep of an LSTM.       #
        #                                                                           #
        # HINT: For sigmoid and tanh you can compute local derivatives in terms of  #
        # the output value from the nonlinearity.                                   #
        #############################################################################
        x, prev_h, Wzx, Wzh, Wax, War, z_hat, r, z, a = cache

        N, D = x.shape
        H = dnext_h.shape[1]

        z_hat_H1 = z_hat[:, :H]
        z_hat_H2 = z_hat[:, H:2 * H]
        # delta
        tanha = np.tanh(a)
        dh = dnext_h
        da = dh * z * (1. - tanha * tanha)
        dh_prev_1 = dh * (1. - z)
        # dz = dh * (z+tanha)
        # dz = dh*tanha+1.-dh*(1.-z)*prev_h
        # dz = dh*tanha+1.-dh*prev_h
        dz = dh * (tanha - prev_h)
        dz_hat_2 = dz * (z * (1. - z))
        # dz_hat_2 = dz*(z_hat_H2*(1.-z_hat_H2))

        dhat_a = Tools.matmul(da, War.T)
        # dz_hat_2 = dhat_r * r
        dr = dhat_a * prev_h

        dx_1 = Tools.matmul(da, Wax.T)
        dh_prev_2 = dhat_a * r  #da* Tools.matmul(r,War.T)
        # dz_hat_1 = dh_prev_2 * (r * (1. - r))
        dz_hat_1 = dr * (r * (1. - r))

        dz_hat = np.hstack((dz_hat_1, dz_hat_2))

        # dh_prev_3 = Tools.matmul(dz_hat_2,Wzh.T)
        # dx_2 = Tools.matmul(dz_hat_2,Wzx.T)
        # dh_prev_3 = Tools.matmul(dz_hat,Wzh.T)
        # dh_prev_3 = Tools.matmul(dz_hat_2,Wzh.T)
        dx_2 = Tools.matmul(dz_hat, Wzx.T)

        # dx_3 = Tools.matmul(dz_hat_1,Wzx.T)
        # dh_prev_4 =Tools.matmul(dz_hat_1, Wzh.T)
        # dx_3 = Tools.matmul(dz_hat,Wzx.T)
        # dh_prev_4 =Tools.matmul(dz_hat, Wzh.T)

        # dh_prev_34 = np.hstack((dh_prev_3, dh_prev_4))
        # dh_prev_34 = Tools.matmul(dh_prev_34,Wzh.T)
        dh_prev_34 = Tools.matmul(dz_hat, Wzh.T)
        # dprev_h = dh_prev_1+dh_prev_2+dh_prev_34 * 2. #dh_prev_3 + dh_prev_4
        # dx = dx_1 + dx_2*2. # +dx_3
        dprev_h = dh_prev_1 + dh_prev_2 + dh_prev_34  #dh_prev_3 + dh_prev_4
        dx = dx_1 + dx_2  # +dx_3

        dWax = Tools.matmul(x.T, da)
        dWar = Tools.matmul((r * prev_h).T, da)
        dba = np.sum(da, axis=0)

        dWzx = Tools.matmul(x.T, dz_hat)
        dWzh = Tools.matmul(prev_h.T, dz_hat)
        dbz = np.sum(dz_hat, axis=0)
        ##############################################################################
        #                               END OF YOUR CODE                             #
        ##############################################################################

        return dx, dprev_h, dWzx, dWzh, dbz, dWax, dWar, dba