def bprop(self, y_grad, h_grad):
        n = self.n_hidden
        h_grad = h_grad + y_grad

        c_grad = h_grad * self._tmp_u
        u_grad = h_grad * (self._tmp_c - self._tmp_h_tm1)
        h_grad *= 1 - self._tmp_u

        c_grad = ca.ascontiguousarray(ca.transpose(c_grad))
        u_grad = ca.ascontiguousarray(ca.transpose(u_grad))

        c_grad = self.act_c.bprop(c_grad)
        ca.sum(c_grad, axis=1, keepdims=True, out=self.b_c.grad_array)

        u_grad = self.act_u.bprop(u_grad)
        ca.sum(u_grad, axis=1, keepdims=True, out=self.b_u.grad_array)

        r_grad = c_grad * self._tmp_h_c
        r_grad = self.act_r.bprop(r_grad)
        ca.sum(r_grad, axis=1, keepdims=True, out=self.b_r.grad_array)

        stack_grad = ca.empty((self.n_hidden * 3, y_grad.shape[0]))
        stack_grad[:n, :] = r_grad
        stack_grad[n : n * 2, :] = u_grad
        stack_grad[n * 2 : n * 3, :] = c_grad

        ca.dot(self._tmp_x.T, stack_grad.T, out=self.w_x.grad_array)
        x_grad = ca.dot(stack_grad.T, self.w_x.array.T)

        stack_grad[n * 2 : n * 3, :] *= self._tmp_r
        ca.dot(self._tmp_h_tm1.T, stack_grad.T, out=self.w_h.grad_array)
        h_grad += ca.dot(stack_grad.T, self.w_h.array.T)

        ca.clip(h_grad, -self.clip, self.clip, out=h_grad)
        return {"x_grad": x_grad, "h_grad": h_grad}
Example #2
0
 def fprop(self):
     pred = self.x.out
     target = self.target.out
     if self.clip:
         ca.clip(pred, _FLT_MIN, .9999999, pred)
     self.out = -ca.sum(target * ca.log(pred) +
                        (1 - target) * ca.log(1 - pred))
    def bprop(self, y_grad, h_grad):
        n = self.n_hidden
        h_grad = h_grad + y_grad

        c_grad = h_grad * self._tmp_u
        u_grad = h_grad * (self._tmp_c - self._tmp_h_tm1)
        h_grad *= (1 - self._tmp_u)

        c_grad = ca.ascontiguousarray(ca.transpose(c_grad))
        u_grad = ca.ascontiguousarray(ca.transpose(u_grad))

        c_grad = self.act_c.bprop(c_grad)
        ca.sum(c_grad, axis=1, keepdims=True, out=self.b_c.grad_array)

        u_grad = self.act_u.bprop(u_grad)
        ca.sum(u_grad, axis=1, keepdims=True, out=self.b_u.grad_array)

        r_grad = c_grad * self._tmp_h_c
        r_grad = self.act_r.bprop(r_grad)
        ca.sum(r_grad, axis=1, keepdims=True, out=self.b_r.grad_array)

        stack_grad = ca.empty((self.n_hidden*3, y_grad.shape[0]))
        stack_grad[:n, :] = r_grad
        stack_grad[n:n*2, :] = u_grad
        stack_grad[n*2:n*3, :] = c_grad

        ca.dot(self._tmp_x.T, stack_grad.T, out=self.w_x.grad_array)
        x_grad = ca.dot(stack_grad.T, self.w_x.array.T)

        stack_grad[n*2:n*3, :] *= self._tmp_r
        ca.dot(self._tmp_h_tm1.T, stack_grad.T, out=self.w_h.grad_array)
        h_grad += ca.dot(stack_grad.T, self.w_h.array.T)

        ca.clip(h_grad, -self.clip, self.clip, out=h_grad)
        return {'x_grad': x_grad, 'h_grad': h_grad}
Example #4
0
def categorical_cross_entropy(y_pred, y_true, eps=1e-15):
    # Assumes one-hot encoding.
    y_pred = ca.clip(y_pred, eps, 1 - eps)
    # XXX: do we need to normalize?
    y_pred /= ca.sum(y_pred, axis=1, keepdims=True)
    loss = -ca.sum(y_true * ca.log(y_pred), axis=1)
    return loss
Example #5
0
def categorical_cross_entropy(y_pred, y_true, eps=1e-15):
    # Assumes one-hot encoding.
    y_pred = ca.clip(y_pred, eps, 1 - eps)
    # XXX: do we need to normalize?
    y_pred /= ca.sum(y_pred, axis=1, keepdims=True)
    loss = -ca.sum(y_true * ca.log(y_pred), axis=1)
    return loss
Example #6
0
 def fprop(self):
     ca.clip(self.x.out, self.a_min, self.a_max, out=self.out)
Example #7
0
 def fprop(self):
     ca.clip(self.x.array, self.a_min, self.a_max, out=self.array)
Example #8
0
 def fprop(self):
     ca.clip(self.x.array, self.a_min, self.a_max, out=self.array)
Example #9
0
 def fprop(self):
     ca.clip(self.x.out, self.a_min, self.a_max, out=self.out)
Example #10
0
 def grad(self):
     pred = self.x.out
     target = self.target.out
     if self.clip:
         ca.clip(pred, _FLT_MIN, .9999999, pred)
     self.x.out_grad = -(target / pred - (1 - target) / (1 - pred))
Example #11
0
 def grad(self):
     pred = self.x.out
     target = self.target.out
     if self.clip:
         ca.clip(pred, _FLT_MIN, .9999999, pred)
     self.x.out_grad = -(target/pred - (1-target)/(1-pred))
Example #12
0
 def fprop(self):
     pred = self.x.out
     target = self.target.out
     if self.clip:
         ca.clip(pred, _FLT_MIN, .9999999, pred)
     self.out = -ca.sum(target*ca.log(pred) + (1 - target)*ca.log(1 - pred))