def bprop(self, y_grad, h_grad):
        n = self.n_hidden
        h_grad = h_grad + y_grad

        c_grad = h_grad * self._tmp_u
        u_grad = h_grad * (self._tmp_c - self._tmp_h_tm1)
        h_grad *= 1 - self._tmp_u

        c_grad = ca.ascontiguousarray(ca.transpose(c_grad))
        u_grad = ca.ascontiguousarray(ca.transpose(u_grad))

        c_grad = self.act_c.bprop(c_grad)
        ca.sum(c_grad, axis=1, keepdims=True, out=self.b_c.grad_array)

        u_grad = self.act_u.bprop(u_grad)
        ca.sum(u_grad, axis=1, keepdims=True, out=self.b_u.grad_array)

        r_grad = c_grad * self._tmp_h_c
        r_grad = self.act_r.bprop(r_grad)
        ca.sum(r_grad, axis=1, keepdims=True, out=self.b_r.grad_array)

        stack_grad = ca.empty((self.n_hidden * 3, y_grad.shape[0]))
        stack_grad[:n, :] = r_grad
        stack_grad[n : n * 2, :] = u_grad
        stack_grad[n * 2 : n * 3, :] = c_grad

        ca.dot(self._tmp_x.T, stack_grad.T, out=self.w_x.grad_array)
        x_grad = ca.dot(stack_grad.T, self.w_x.array.T)

        stack_grad[n * 2 : n * 3, :] *= self._tmp_r
        ca.dot(self._tmp_h_tm1.T, stack_grad.T, out=self.w_h.grad_array)
        h_grad += ca.dot(stack_grad.T, self.w_h.array.T)

        ca.clip(h_grad, -self.clip, self.clip, out=h_grad)
        return {"x_grad": x_grad, "h_grad": h_grad}
    def fprop(self, x, h):
        self._tmp_x = x
        self._tmp_h_tm1 = h

        x_stack = ca.dot(self.w_x.array.T, x.T)
        h_stack = ca.dot(self.w_h.array.T, h.T)

        n = self.n_hidden
        x_r = x_stack[:n, :]
        x_u = x_stack[n : n * 2, :]
        x_c = x_stack[n * 2 : n * 3, :]
        h_r = h_stack[:n, :]
        h_u = h_stack[n : n * 2, :]
        h_c = h_stack[n * 2 : n * 3, :]

        r = self.act_r.fprop(x_r + h_r + self.b_r.array)
        u = self.act_u.fprop(x_u + h_u + self.b_u.array)
        c = self.act_c.fprop(x_c + r * h_c + self.b_c.array)

        u = ca.ascontiguousarray(ca.transpose(u))
        c = ca.ascontiguousarray(ca.transpose(c))

        h_tp1 = 1 - u
        h_tp1 *= h
        h_tp1 += u * c

        self._tmp_r = r
        self._tmp_u = u
        self._tmp_c = c
        self._tmp_h_c = h_c
        return {"y": h_tp1, "h": h_tp1}
    def bprop(self, y_grad, h_grad):
        n = self.n_hidden
        h_grad = h_grad + y_grad

        c_grad = h_grad * self._tmp_u
        u_grad = h_grad * (self._tmp_c - self._tmp_h_tm1)
        h_grad *= (1 - self._tmp_u)

        c_grad = ca.ascontiguousarray(ca.transpose(c_grad))
        u_grad = ca.ascontiguousarray(ca.transpose(u_grad))

        c_grad = self.act_c.bprop(c_grad)
        ca.sum(c_grad, axis=1, keepdims=True, out=self.b_c.grad_array)

        u_grad = self.act_u.bprop(u_grad)
        ca.sum(u_grad, axis=1, keepdims=True, out=self.b_u.grad_array)

        r_grad = c_grad * self._tmp_h_c
        r_grad = self.act_r.bprop(r_grad)
        ca.sum(r_grad, axis=1, keepdims=True, out=self.b_r.grad_array)

        stack_grad = ca.empty((self.n_hidden*3, y_grad.shape[0]))
        stack_grad[:n, :] = r_grad
        stack_grad[n:n*2, :] = u_grad
        stack_grad[n*2:n*3, :] = c_grad

        ca.dot(self._tmp_x.T, stack_grad.T, out=self.w_x.grad_array)
        x_grad = ca.dot(stack_grad.T, self.w_x.array.T)

        stack_grad[n*2:n*3, :] *= self._tmp_r
        ca.dot(self._tmp_h_tm1.T, stack_grad.T, out=self.w_h.grad_array)
        h_grad += ca.dot(stack_grad.T, self.w_h.array.T)

        ca.clip(h_grad, -self.clip, self.clip, out=h_grad)
        return {'x_grad': x_grad, 'h_grad': h_grad}
    def fprop(self, x, h):
        self._tmp_x = x
        self._tmp_h_tm1 = h

        x_stack = ca.dot(self.w_x.array.T, x.T)
        h_stack = ca.dot(self.w_h.array.T, h.T)

        n = self.n_hidden
        x_r = x_stack[:n, :]
        x_u = x_stack[n:n*2, :]
        x_c = x_stack[n*2:n*3, :]
        h_r = h_stack[:n, :]
        h_u = h_stack[n:n*2, :]
        h_c = h_stack[n*2:n*3, :]

        r = self.act_r.fprop(x_r + h_r + self.b_r.array)
        u = self.act_u.fprop(x_u + h_u + self.b_u.array)
        c = self.act_c.fprop(x_c + r*h_c + self.b_c.array)

        u = ca.ascontiguousarray(ca.transpose(u))
        c = ca.ascontiguousarray(ca.transpose(c))

        h_tp1 = 1-u
        h_tp1 *= h
        h_tp1 += u*c
        
        self._tmp_r = r
        self._tmp_u = u
        self._tmp_c = c
        self._tmp_h_c = h_c
        return {'y': h_tp1, 'h': h_tp1}
Example #5
0
def test_transpose():
    shapes = [(4, 4), (5, 4), (8, 8), (32, 32), (55, 44), (64, 55), (55, 64),
              (32, 64), (64, 128), (128, 64), (128, 1)]
    for shape in shapes:
        a_np = np.reshape(np.arange(np.prod(shape)), shape) + 1
        a_ca = ca.array(a_np)
        a_np = np.ascontiguousarray(a_np.T)
        a_ca = ca.ascontiguousarray(a_ca.T)
        print(np.allclose(a_np, np.array(a_ca)))
Example #6
0
def test_transpose():
    shapes = [(4, 4), (5, 4), (8, 8), (32, 32), (55, 44), (64, 55), (55, 64),
              (32, 64), (64, 128), (128, 64), (128, 1)]
    for shape in shapes:
        a_np = np.reshape(np.arange(np.prod(shape)), shape)+1
        a_ca = ca.array(a_np)
        a_np = np.ascontiguousarray(a_np.T)
        a_ca = ca.ascontiguousarray(a_ca.T)
        print(np.allclose(a_np, np.array(a_ca)))
Example #7
0
 def bprop(self):
     self.x.out_grad = ca.transpose(self.out_grad)
     if self.contiguous:
         self.out = ca.ascontiguousarray(self.x.out_grad)
Example #8
0
 def fprop(self):
     self.out = ca.transpose(self.x.out)
     if self.contiguous:
         self.out = ca.ascontiguousarray(self.out)
Example #9
0
 def bprop(self):
     self.x.grad_array = ca.transpose(self.grad_array)
     if self.contiguous:
         self.array = ca.ascontiguousarray(self.x.grad_array)
Example #10
0
 def bprop(self):
     self.x.out_grad = ca.transpose(self.out_grad)
     if self.contiguous:
         self.out = ca.ascontiguousarray(self.x.out_grad)
Example #11
0
 def fprop(self):
     self.out = ca.transpose(self.x.out)
     if self.contiguous:
         self.out = ca.ascontiguousarray(self.out)