Exemple #1
0
 def forward(self, Y_train, Y_hat):
     target = Y_train.reshape(Y_hat.shape)
     m = target.shape[0]
     p.clip(Y_hat, self.epsilon, 1.0 - self.epsilon, out=Y_hat)
     cost = -target * p.log(Y_hat) - (1 - target) * p.log(1 - Y_hat)
     J = p.sum(cost, axis=0, keepdims=True) / m
     return p.squeeze(J)
Exemple #2
0
    def lstm_step_backward(self, da_next, dc_next, cache):

        z_i, z_f, z_o, z_g, z_t, c_prev, a_prev, x = cache

        dz_o = z_t * da_next
        dc_t = z_o * (1 - z_t * z_t) * da_next + dc_next
        dz_f = c_prev * dc_t
        dz_i = z_g * dc_t
        dc_prev = z_f * dc_t
        dz_g = z_i * dc_t

        da_i = (1 - z_i) * z_i * dz_i
        da_f = (1 - z_f) * z_f * dz_f
        da_o = (1 - z_o) * z_o * dz_o
        da_g = (1 - z_g * z_g) * dz_g
        da = p.hstack((da_i, da_f, da_o, da_g))

        dWx = x.T.dot(da)
        dWa = a_prev.T.dot(da)

        db = p.sum(da, axis=0)
        dx = da.dot(self.parameters['Wx'].T)
        da_prev = da.dot(self.parameters['Wa'].T)

        return dx, da_prev, dc_prev, dWx, dWa, db
Exemple #3
0
 def rnn_step_backward(self, da_next, cache):
     x, a_prev, a_next = cache
     da = da_next * (1 - a_next * a_next)
     dx = da.dot(self.parameters['Wxa'].T)
     da_prev = da.dot(self.parameters['Waa'].T)
     dWx = x.T.dot(da)
     dWh = a_prev.T.dot(da)
     db = p.sum(da, axis=0)
     return dx, da_prev, dWx, dWh, db
Exemple #4
0
 def twoDims_batchnormal_backward(self, pre_grad):
     xhat, xmu, ivar, sqrtvar = self.caches
     del self.caches
     m, nx = pre_grad.shape
     self.gradients['beta'] = p.sum(pre_grad, axis=0)
     dgammax = pre_grad
     self.gradients['gamma'] = p.sum(xhat * dgammax, axis=0)
     dxhat = self.parameters['gamma'] * dgammax
     divar = p.sum(xmu * dxhat, axis=0)
     dxmu1 = dxhat * ivar
     dsqrtvar = -1. / (sqrtvar ** 2) * divar
     dvar = 0.5 * ivar * dsqrtvar
     dsq = p.divide(1., m) * p.ones_like(pre_grad) * dvar
     dxmu2 = 2 * dsq * xmu
     dx1 = dxmu1 + dxmu2
     dmu = -1 * p.sum(dx1, axis=0)
     dx2 = p.divide(1., m) * p.ones_like(pre_grad) * dmu
     dx = dx1 + dx2
     return dx
Exemple #5
0
    def forward(self, targets, outputs):
        p.clip(outputs, self.epsilon, 1.0 - self.epsilon, out=outputs)

        if targets.ndim == 1 and outputs.ndim == 2:
            N, T = outputs.shape
            loss = -p.sum(p.log(outputs[p.arange(N), targets])) / N
            return loss

        N, T = targets.shape
        if outputs.ndim == 3:
            N, T, D = outputs.shape
            outputs = outputs.reshape(N * T, D)
        if targets.ndim == 3:
            N, T, D = targets.shape
            targets = targets.reshape(N * T, D)

        loss = -p.sum(p.log(outputs[p.arange(N * T),
                                    targets.reshape(N * T, )])) / N
        return loss
Exemple #6
0
 def backward(self, dout):
     dZ = ac_get_grad(dout, self.Z, self.activation)
     if self.batch_normal:
         dZ = self.batch_normal.backward(dZ)
     N = dZ.shape[0]
     self.gradients['b'] = 1. / N * p.sum(dZ, axis=(0, 2, 3))
     num_filters, _, filter_height, filter_width = self.parameters['W'].shape
     dout_reshaped = dZ.transpose(1, 2, 3, 0).reshape(num_filters, -1)
     self.gradients['W'] = 1. / N * dout_reshaped.dot(self.X_col.T).reshape(self.parameters['W'].shape)
     dx_cols = self.parameters['W'].reshape(num_filters, -1).T.dot(dout_reshaped)
     if isinstance(dZ, numpy.ndarray):
         dx = col2im_indices_cpu(dx_cols, self.x.shape, filter_height, filter_width, self.padding, self.stride)
     else:
         dx = col2im_indices_gpu(dx_cols, self.x.shape, filter_height, filter_width, self.padding, self.stride)
     del self.x, self.X_col
     return dx
Exemple #7
0
 def backward(self, dout):
     if self.keep_prob < 1. and self.mode == 'train':
         dout = dout * self.drop_mask / self.keep_prob
     dout = ac_get_grad(dout, self.Z, self.activation)
     if self.batch_normal:
         dout = self.batch_normal.backward(dout)
     if self.x.ndim == 3:
         N, T, D = self.x.shape
         dx = dout.reshape(N * T, self.unit_number).dot(self.parameters['W'].T).reshape(N, T, D)
         self.gradients['W'] = 1. / N * dout.reshape(N * T, self.unit_number).T.dot(self.x.reshape(N * T, D)).T
         self.gradients['b'] = 1. / N * dout.sum(axis=(0, 1))
     else:
         N, D = self.x.shape
         dx = p.dot(dout, self.parameters['W'].T)
         self.gradients['W'] = 1. / N * p.dot(self.x.T, dout)
         self.gradients['b'] = 1. / N * p.sum(dout, axis=0)
     if self.flatten:
         dx = dx.reshape(self.x_shape)  # 还原输入数据的形状(对应张量)
     return dx