Exemple #1
0
 def bprop(self):
     self.conv_op.bprop(
         self.out_grad, self.weights.array, self.x.out, filters_d=self.weights.grad_array, to_imgs=False
     )
     self.conv_op.fprop(self.out_grad, self.weights.array, convout=self.x.out_grad)
     if self.bias is not None:
         ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.bias.grad_array)
Exemple #2
0
 def bprop(self):
     self.conv_op.bprop(
         self.x.out, self.weights.array, self.out_grad,
         filters_d=self.weights.grad_array, imgs_d=self.x.out_grad
     )
     ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0,
            keepdims=True, out=self.bias.grad_array)
Exemple #3
0
 def fprop(self):
     # -target * log(pred)
     tmp1 = self.pred.array + self.eps
     ca.log(tmp1, tmp1)
     tmp1 *= self.target.array
     ca.sum(tmp1, axis=1, out=self.array)
     ca.negative(self.array, self.array)
 def fprop(self):
     # c - multiplier*(pred - target)**2
     tmp = self.pred.array - self.target.array
     tmp **= 2.0
     tmp *= -self.multiplier
     tmp += self.c
     ca.sum(tmp, axis=self.axis, out=self.array)
Exemple #5
0
def test_sum():
    a_np = np.random.normal(size=(5, 5))
    a_ca = ca.array(a_np)

    s_np = np.sum(a_np)
    s_ca = ca.sum(a_ca)
    print(np.allclose(s_np, np.array(s_ca)))

    a_np = np.random.normal(size=(5, 5))
    a_ca = ca.array(a_np)

    s_np = np.sum(a_np, 0)
    s_ca = ca.sum(a_ca, 0)
    print(np.allclose(s_np, np.array(s_ca)))

    s_np = np.sum(a_np, 1)
    s_ca = ca.sum(a_ca, 1)
    print(np.allclose(s_np, np.array(s_ca)))

    a_np = np.random.normal(size=(5, 5, 10))
    a_ca = ca.array(a_np)

    s_np = np.sum(a_np, 0)
    s_ca = ca.sum(a_ca, 0)
    print(np.allclose(s_np, np.array(s_ca)))

    s_np = np.sum(a_np, 2)
    s_ca = ca.sum(a_ca, 2)
    print(np.allclose(s_np, np.array(s_ca)))
Exemple #6
0
def categorical_cross_entropy(y_pred, y_true, eps=1e-15):
    # Assumes one-hot encoding.
    y_pred = ca.clip(y_pred, eps, 1 - eps)
    # XXX: do we need to normalize?
    y_pred /= ca.sum(y_pred, axis=1, keepdims=True)
    loss = -ca.sum(y_true * ca.log(y_pred), axis=1)
    return loss
Exemple #7
0
 def fprop(self):
     # -target * log(pred)
     tmp1 = self.pred.array + self.eps
     ca.log(tmp1, tmp1)
     tmp1 *= self.target.array
     ca.sum(tmp1, axis=1, out=self.array)
     ca.negative(self.array, self.array)
Exemple #8
0
    def bprop(self):
        ca.multiply(self._tmp_batch_centered, self.out_grad, self.x.out_grad)
        tmp = ca.mean(self.x.out_grad, axis=0, keepdims=True)
        ca.multiply(self._tmp_batch_centered, tmp, self.x.out_grad)
        self.x.out_grad *= -1
        self.x.out_grad *= self._tmp_batch_inv_std
        self.x.out_grad *= self._tmp_batch_inv_std

        ca.mean(self.out_grad, axis=0, keepdims=True, out=tmp)
        self.x.out_grad += self.out_grad
        self.x.out_grad -= tmp
        self.x.out_grad *= self._tmp_batch_inv_std

        if self.affine:
            self.x.out_grad *= self.gamma.array
            # Normalized input
            self._tmp_batch_centered *= self._tmp_batch_inv_std
            self._tmp_batch_centered *= self.out_grad
            ca.sum(self._tmp_batch_centered,
                   axis=0,
                   keepdims=True,
                   out=self.gamma.grad_array)
            ca.sum(self.out_grad,
                   axis=0,
                   keepdims=True,
                   out=self.beta.grad_array)
Exemple #9
0
def categorical_cross_entropy(y_pred, y_true, eps=1e-15):
    # Assumes one-hot encoding.
    y_pred = ca.clip(y_pred, eps, 1 - eps)
    # XXX: do we need to normalize?
    y_pred /= ca.sum(y_pred, axis=1, keepdims=True)
    loss = -ca.sum(y_true * ca.log(y_pred), axis=1)
    return loss
Exemple #10
0
def test_sum():
    a_np = np.random.normal(size=(5, 5))
    a_ca = ca.array(a_np)

    s_np = np.sum(a_np)
    s_ca = ca.sum(a_ca)
    print(np.allclose(s_np, np.array(s_ca)))

    a_np = np.random.normal(size=(5, 5))
    a_ca = ca.array(a_np)

    s_np = np.sum(a_np, 0)
    s_ca = ca.sum(a_ca, 0)
    print(np.allclose(s_np, np.array(s_ca)))

    s_np = np.sum(a_np, 1)
    s_ca = ca.sum(a_ca, 1)
    print(np.allclose(s_np, np.array(s_ca)))

    a_np = np.random.normal(size=(5, 5, 10))
    a_ca = ca.array(a_np)

    s_np = np.sum(a_np, 0)
    s_ca = ca.sum(a_ca, 0)
    print(np.allclose(s_np, np.array(s_ca)))

    s_np = np.sum(a_np, 2)
    s_ca = ca.sum(a_ca, 2)
    print(np.allclose(s_np, np.array(s_ca)))
 def fprop(self):
     # c - multiplier*(pred - target)**2
     tmp = self.pred.array - self.target.array
     tmp **= 2.0
     tmp *= -self.multiplier
     tmp += self.c
     ca.sum(tmp, axis=self.axis, out=self.array)
 def fprop(self):
     tmp1 = self.mu.array**2
     ca.negative(tmp1, tmp1)
     tmp1 += self.logvar.array
     tmp1 += 1
     tmp1 -= ca.exp(self.logvar.array)
     ca.sum(tmp1, axis=self.axis, out=self.array)
     self.array *= -0.5
Exemple #13
0
 def bprop(self):
     self.conv_op.bprop(
         self.x.array, self.weights.array, self.grad_array,
         filters_d=self.weights.grad_array, imgs_d=self.x.grad_array
     )
     if self.bias is not None:
         ca.sum(ca.sum(self.grad_array, axis=(2, 3), keepdims=True), axis=0,
                keepdims=True, out=self.bias.grad_array)
Exemple #14
0
 def encode_bprop(self, y_grad):
     y_grad = self.activation.bprop(y_grad)
     # Because the weight gradient has already been updated by
     # decode_bprop() we must add the contribution.
     w_grad = self.weights.grad_array
     w_grad += ca.dot(self._tmp_x.T, y_grad)
     ca.sum(y_grad, axis=0, out=self.bias.grad_array)
     return ca.dot(y_grad, self.weights.array.T)
Exemple #15
0
 def bprop(self, y_grad):
     _, x_grad = self.conv_op.bprop(
         self._tmp_x, self.weights.array, y_grad, to_imgs=self.bprop_to_x,
         filters_d=self.weights.grad_array
     )
     ca.sum(ca.sum(y_grad, axis=(2, 3), keepdims=True), axis=0,
            keepdims=True, out=self.bias.grad_array)
     return x_grad
Exemple #16
0
 def fprop(self):
     # e_i = exp(x_i - max(x))
     # y = e_i / sum(e)
     tmp1 = ca.amax(self.x.array, axis=1, keepdims=True)
     ca.subtract(self.x.array, tmp1, self.array)
     ca.exp(self.array, self.array)
     ca.sum(self.array, axis=1, keepdims=True, out=tmp1)
     self.array /= tmp1
Exemple #17
0
 def encode_bprop(self, y_grad):
     y_grad = self.activation.bprop(y_grad)
     # Because W's gradient has already been updated by decode_bprop() at
     # this point, we should add its contribution from the encode step.
     W_grad = self.W.grad_array
     W_grad += ca.dot(self._tmp_last_x.T, y_grad)
     ca.sum(y_grad, axis=0, out=self.b.grad_array)
     return ca.dot(y_grad, self.W.array.T)
Exemple #18
0
 def bprop(self, y_grad, to_x=True):
     _, x_grad = self.conv_op.bprop(
         self._tmp_last_x, self.W.array, y_grad, to_imgs=to_x,
         filters_d=self.W.grad_array
     )
     ca.sum(ca.sum(y_grad, axis=(2, 3), keepdims=True), axis=0,
            keepdims=True, out=self.b.grad_array)
     return x_grad
Exemple #19
0
 def encode_bprop(self, y_grad):
     y_grad = self.activation.bprop(y_grad)
     # Because the weight gradient has already been updated by
     # decode_bprop() we must add the contribution.
     w_grad = self.weights.grad_array
     w_grad += ca.dot(self._tmp_x.T, y_grad)
     ca.sum(y_grad, axis=0, out=self.bias.grad_array)
     return ca.dot(y_grad, self.weights.array.T)
 def fprop(self):
     tmp1 = self.mu.array**2
     ca.negative(tmp1, tmp1)
     tmp1 += self.logvar.array
     tmp1 += 1
     tmp1 -= ca.exp(self.logvar.array)
     ca.sum(tmp1, axis=self.axis, out=self.array)
     self.array *= -0.5
 def fprop(self):
     tmp1 = self.mu.out**2
     ca.negative(tmp1, tmp1)
     tmp1 += self.log_sigma.out
     tmp1 += 1
     tmp1 -= ca.exp(self.log_sigma.out)
     ca.sum(tmp1, axis=1, keepdims=True, out=self.out)
     self.out *= -0.5
Exemple #22
0
 def bprop(self):
     self.conv_op.bprop(self.x.out,
                        self.weights.array,
                        self.out_grad,
                        filters_d=self.weights.grad_array,
                        imgs_d=self.x.out_grad)
     ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True),
            axis=0,
            keepdims=True,
            out=self.bias.grad_array)
Exemple #23
0
 def bprop(self, y_grad):
     _, x_grad = self.conv_op.bprop(self.last_x,
                                    self.W.array,
                                    y_grad,
                                    filters_d=self.W.grad_array)
     ca.sum(ca.sum(y_grad, axis=(2, 3), keepdims=True),
            axis=0,
            keepdims=True,
            out=self.b.grad_array)
     return x_grad
Exemple #24
0
 def fprop(self):
     # -log(1 - pred)*(1 - target) - log(pred)*target
     tmp1 = 1 - self.pred.out
     tmp1 += self.eps
     ca.log(tmp1, tmp1)
     tmp2 = 1 - self.target.out
     ca.multiply(tmp1, tmp2, tmp1)
     ca.add(self.pred.out, self.eps, tmp2)
     ca.log(tmp2, tmp2)
     tmp2 *= self.target.out
     ca.add(tmp1, tmp2, tmp1)
     tmp1 *= -1
     ca.sum(tmp1, axis=1, keepdims=True, out=self.out)
Exemple #25
0
 def fprop(self):
     # -log(1 - pred)*(1 - target) - log(pred)*target
     tmp1 = 1 - self.pred.array
     tmp1 += self.eps
     ca.log(tmp1, tmp1)
     tmp2 = 1 - self.target.array
     ca.multiply(tmp1, tmp2, tmp1)
     ca.add(self.pred.array, self.eps, tmp2)
     ca.log(tmp2, tmp2)
     tmp2 *= self.target.array
     ca.add(tmp1, tmp2, tmp1)
     tmp1 *= -1
     ca.sum(tmp1, axis=1, out=self.array)
Exemple #26
0
 def fprop(self):
     # -log(1 - pred)*(1 - target) - log(pred)*target
     tmp1 = 1 - self.pred.array
     tmp1 += self.eps
     ca.log(tmp1, tmp1)
     tmp2 = 1 - self.target.array
     ca.multiply(tmp1, tmp2, tmp1)
     ca.add(self.pred.array, self.eps, tmp2)
     ca.log(tmp2, tmp2)
     tmp2 *= self.target.array
     ca.add(tmp1, tmp2, tmp1)
     tmp1 *= -1
     ca.sum(tmp1, axis=1, out=self.array)
    def bprop(self, y_grad, h_grad):
        ca.dot(self._tmp_h.T, y_grad, out=self.w_hy.grad_array)
        ca.sum(y_grad, axis=0, keepdims=True, out=self.b_y.grad_array)
        h_grad = h_grad + ca.dot(y_grad, self.w_hy.array.T)

        h_grad = self.activation.bprop(h_grad)
        ca.sum(h_grad, axis=0, keepdims=True, out=self.b_h.grad_array)
        ca.dot(self._tmp_h_tm1.T, h_grad, out=self.w_hh.grad_array)
        ca.dot(self._tmp_x.T, h_grad, out=self.w_xh.grad_array)

        x_grad = ca.dot(h_grad, self.w_xh.array.T)
        h_grad = ca.dot(h_grad, self.w_hh.array.T)

        return {'x_grad': x_grad, 'h_grad': h_grad}
    def bprop(self, y_grad, h_grad):
        ca.dot(self._tmp_h.T, y_grad, out=self.w_hy.grad_array)
        ca.sum(y_grad, axis=0, keepdims=True, out=self.b_y.grad_array)
        h_grad = h_grad + ca.dot(y_grad, self.w_hy.array.T)

        h_grad = self.activation.bprop(h_grad)
        ca.sum(h_grad, axis=0, keepdims=True, out=self.b_h.grad_array)
        ca.dot(self._tmp_h_tm1.T, h_grad, out=self.w_hh.grad_array)
        ca.dot(self._tmp_x.T, h_grad, out=self.w_xh.grad_array)

        x_grad = ca.dot(h_grad, self.w_xh.array.T)
        h_grad = ca.dot(h_grad, self.w_hh.array.T)

        return {"x_grad": x_grad, "h_grad": h_grad}
Exemple #29
0
 def bprop(self):
     self.conv_op.bprop(self.out_grad,
                        self.weights.array,
                        self.x.out,
                        filters_d=self.weights.grad_array,
                        to_imgs=False)
     self.conv_op.fprop(self.out_grad,
                        self.weights.array,
                        convout=self.x.out_grad)
     if self.bias is not None:
         ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True),
                axis=0,
                keepdims=True,
                out=self.bias.grad_array)
Exemple #30
0
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02):
    Q = ca.transpose(Q)
    for step in xrange(steps):
 	E = ca.subtract(R, ca.multiply(ca.dot(P,Q), mask))

	rmse = ca.sqrt(ca.sum(ca.power(E,2)) / ca.sum(mask))
	rmse = np.array(rmse)[0]

 	print 'step: %i RMSE: %f' % (step, rmse)
        if rmse < 0.65:
            break
	P = ca.add(ca.multiply(P,(1-alpha*beta)),ca.multiply(ca.dot(E,ca.transpose(Q)), 2*alpha))
	Q = ca.add(ca.multiply(Q,(1-alpha*beta)),ca.multiply(ca.dot(ca.transpose(P),E),2*alpha))

    return P, Q
Exemple #31
0
 def fprop(self):
     pred = self.x.out
     target = self.target.out
     if self.clip:
         ca.clip(pred, _FLT_MIN, .9999999, pred)
     self.out = -ca.sum(target * ca.log(pred) +
                        (1 - target) * ca.log(1 - pred))
 def fprop(self):
     tmp1 = self.mu.array**2
     ca.negative(tmp1, tmp1)
     tmp1 += self.log_sigma.array
     tmp1 += 1
     tmp1 -= ca.exp(self.log_sigma.array)
     self.array = ca.sum(tmp1)
     self.array *= -0.5
 def fprop(self):
     tmp1 = self.mu.out**2
     ca.negative(tmp1, tmp1)
     tmp1 += self.log_sigma.out
     tmp1 += 1
     tmp1 -= ca.exp(self.log_sigma.out)
     self.out = ca.sum(tmp1)
     self.out *= -0.5
 def fprop(self):
     tmp1 = self.mu.array**2
     ca.negative(tmp1, tmp1)
     tmp1 += self.log_sigma.array
     tmp1 += 1
     tmp1 -= ca.exp(self.log_sigma.array)
     self.array = ca.sum(tmp1)
     self.array *= -0.5
Exemple #35
0
 def fprop(self):
     tmp1 = self.mu.out**2
     ca.negative(tmp1, tmp1)
     tmp1 += self.log_sigma.out
     tmp1 += 1
     tmp1 -= ca.exp(self.log_sigma.out)
     self.out = ca.sum(tmp1)
     self.out *= -0.5
Exemple #36
0
 def func(x, *args):
     ca.random.seed(random_seed)
     p_idx = args[0]
     param_vals = layer.params()[p_idx].values
     param_vals *= 0
     param_vals += ca.array(np.reshape(x, param_vals.shape))
     out = layer.fprop(ca.array(x0), 'train')
     y = ca.sum(out)
     return np.array(y)
Exemple #37
0
 def func(x, *args):
     ca.random.seed(random_seed)
     p_idx = args[0]
     param_vals = layer._params[p_idx].array
     param_vals *= 0
     param_vals += ca.array(np.reshape(x, param_vals.shape))
     out = layer.fprop(ca.array(x0), 'train')
     y = ca.sum(out)
     return np.array(y)
Exemple #38
0
def normalize(matrix, gpuFlag=False):
    if gpuFlag == True:
        import cudarray as ca
        norm = ca.sqrt(ca.sum(ca.power(matrix, 2), 1, keepdims=True))
        matrix_n = matrix / norm
    else:
        norm = np.sqrt(np.sum(np.square(matrix), 1, keepdims=True))
        matrix_n = matrix / norm

    return matrix_n
    def _update(self):
        # Forward propagation
        next_x = self.x.array
        x_feats = [None]*len(self.layers)
        x_grams = [None]*len(self.layers)
        for l, layer in enumerate(self.layers):
            next_x = layer.fprop(next_x)
            if self.subject_weights[l] > 0:
                x_feats[l] = next_x
            if self.style_weights[l] > 0:
                x_feats[l] = next_x
                x_grams[l] = gram_matrix(next_x)

        # Backward propagation
        grad = ca.zeros_like(next_x)
        loss = ca.zeros(1)
        for l, layer in reversed(list(enumerate(self.layers))):
            if self.subject_weights[l] > 0:
                diff = x_feats[l] - self.subject_feats[l]
                norm = ca.sum(ca.fabs(diff)) + 1e-8
                weight = float(self.subject_weights[l]) / norm
                grad += diff * weight
                loss += 0.5*weight*ca.sum(diff**2)
            if self.style_weights[l] > 0:
                diff = x_grams[l] - self.style_grams[l]
                n_channels = diff.shape[0]
                x_feat = ca.reshape(x_feats[l], (n_channels, -1))
                style_grad = ca.reshape(ca.dot(diff, x_feat), x_feats[l].shape)
                norm = ca.sum(ca.fabs(style_grad))
                weight = float(self.style_weights[l]) / norm
                style_grad *= weight
                grad += style_grad
                loss += 0.25*weight*ca.sum(diff**2)
            grad = layer.bprop(grad)

        if self.tv_weight > 0:
            x = ca.reshape(self.x.array, (3, 1) + grad.shape[2:])
            tv = self.tv_conv.fprop(x, self.tv_kernel)
            tv *= self.tv_weight
            grad -= ca.reshape(tv, grad.shape)

        ca.copyto(self.x.grad_array, grad)
        return loss
Exemple #40
0
def normalize(matrix,gpuFlag=False):
    if gpuFlag==True:
        import cudarray as ca
        norm=ca.sqrt(ca.sum(ca.power(matrix,2),1,keepdims=True));
        matrix_n=matrix/norm
    else:
        norm=np.sqrt(np.sum(np.square(matrix),1,keepdims=True));
        matrix_n=matrix/norm
    
    return matrix_n
Exemple #41
0
    def bprop(self):
        ca.multiply(self._tmp_batch_centered, self.grad_array,
                    self.x.grad_array)
        tmp = ca.mean(ca.mean(self.x.grad_array, axis=0, keepdims=True),
                      axis=(2, 3), keepdims=True)
        ca.multiply(self._tmp_batch_centered, tmp, self.x.grad_array)
        self.x.grad_array *= -1
        self.x.grad_array *= self._tmp_batch_inv_std
        self.x.grad_array *= self._tmp_batch_inv_std

        tmp = ca.mean(ca.mean(self.grad_array, axis=0, keepdims=True),
                      axis=(2, 3), keepdims=True)
        self.x.grad_array += self.grad_array
        self.x.grad_array -= tmp
        self.x.grad_array *= self._tmp_batch_inv_std

        if self.affine:
            self.x.grad_array *= self.gamma.array
            # Normalized input
            self._tmp_batch_centered *= self._tmp_batch_inv_std
            self._tmp_batch_centered *= self.grad_array
            ca.sum(ca.sum(self._tmp_batch_centered, axis=(2, 3),
                          keepdims=True), axis=0, keepdims=True,
                   out=self.gamma.grad_array)
            ca.sum(ca.sum(self.grad_array, axis=(2, 3), keepdims=True), axis=0,
                   keepdims=True, out=self.beta.grad_array)
    def bprop(self, y_grad, h_grad):
        n = self.n_hidden
        h_grad = h_grad + y_grad

        c_grad = h_grad * self._tmp_u
        u_grad = h_grad * (self._tmp_c - self._tmp_h_tm1)
        h_grad *= (1 - self._tmp_u)

        c_grad = ca.ascontiguousarray(ca.transpose(c_grad))
        u_grad = ca.ascontiguousarray(ca.transpose(u_grad))

        c_grad = self.act_c.bprop(c_grad)
        ca.sum(c_grad, axis=1, keepdims=True, out=self.b_c.grad_array)

        u_grad = self.act_u.bprop(u_grad)
        ca.sum(u_grad, axis=1, keepdims=True, out=self.b_u.grad_array)

        r_grad = c_grad * self._tmp_h_c
        r_grad = self.act_r.bprop(r_grad)
        ca.sum(r_grad, axis=1, keepdims=True, out=self.b_r.grad_array)

        stack_grad = ca.empty((self.n_hidden*3, y_grad.shape[0]))
        stack_grad[:n, :] = r_grad
        stack_grad[n:n*2, :] = u_grad
        stack_grad[n*2:n*3, :] = c_grad

        ca.dot(self._tmp_x.T, stack_grad.T, out=self.w_x.grad_array)
        x_grad = ca.dot(stack_grad.T, self.w_x.array.T)

        stack_grad[n*2:n*3, :] *= self._tmp_r
        ca.dot(self._tmp_h_tm1.T, stack_grad.T, out=self.w_h.grad_array)
        h_grad += ca.dot(stack_grad.T, self.w_h.array.T)

        ca.clip(h_grad, -self.clip, self.clip, out=h_grad)
        return {'x_grad': x_grad, 'h_grad': h_grad}
    def bprop(self):
        ca.multiply(self._tmp_batch_centered, self.out_grad, self.x.out_grad)
        tmp = ca.mean(ca.mean(self.x.out_grad, axis=0, keepdims=True),
                      axis=(2, 3), keepdims=True)
        ca.multiply(self._tmp_batch_centered, tmp, self.x.out_grad)
        self.x.out_grad *= -1
        self.x.out_grad *= self._tmp_batch_inv_std
        self.x.out_grad *= self._tmp_batch_inv_std

        tmp = ca.mean(ca.mean(self.out_grad, axis=0, keepdims=True),
                      axis=(2, 3), keepdims=True)
        self.x.out_grad += self.out_grad
        self.x.out_grad -= tmp
        self.x.out_grad *= self._tmp_batch_inv_std

        if self.affine:
            self.x.out_grad *= self.gamma.array
            # Normalized input
            self._tmp_batch_centered *= self._tmp_batch_inv_std
            self._tmp_batch_centered *= self.out_grad
            ca.sum(ca.sum(self._tmp_batch_centered, axis=(2, 3),
                          keepdims=True), axis=0, keepdims=True,
                   out=self.gamma.grad_array)
            ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0,
                   keepdims=True, out=self.beta.grad_array)
    def bprop(self, y_grad, h_grad):
        n = self.n_hidden
        h_grad = h_grad + y_grad

        c_grad = h_grad * self._tmp_u
        u_grad = h_grad * (self._tmp_c - self._tmp_h_tm1)
        h_grad *= 1 - self._tmp_u

        c_grad = ca.ascontiguousarray(ca.transpose(c_grad))
        u_grad = ca.ascontiguousarray(ca.transpose(u_grad))

        c_grad = self.act_c.bprop(c_grad)
        ca.sum(c_grad, axis=1, keepdims=True, out=self.b_c.grad_array)

        u_grad = self.act_u.bprop(u_grad)
        ca.sum(u_grad, axis=1, keepdims=True, out=self.b_u.grad_array)

        r_grad = c_grad * self._tmp_h_c
        r_grad = self.act_r.bprop(r_grad)
        ca.sum(r_grad, axis=1, keepdims=True, out=self.b_r.grad_array)

        stack_grad = ca.empty((self.n_hidden * 3, y_grad.shape[0]))
        stack_grad[:n, :] = r_grad
        stack_grad[n : n * 2, :] = u_grad
        stack_grad[n * 2 : n * 3, :] = c_grad

        ca.dot(self._tmp_x.T, stack_grad.T, out=self.w_x.grad_array)
        x_grad = ca.dot(stack_grad.T, self.w_x.array.T)

        stack_grad[n * 2 : n * 3, :] *= self._tmp_r
        ca.dot(self._tmp_h_tm1.T, stack_grad.T, out=self.w_h.grad_array)
        h_grad += ca.dot(stack_grad.T, self.w_h.array.T)

        ca.clip(h_grad, -self.clip, self.clip, out=h_grad)
        return {"x_grad": x_grad, "h_grad": h_grad}
Exemple #45
0
    def _update(self):
        # Forward propagation
        next_x = self.x.array
        x_feats = [None] * len(self.layers)
        for l, layer in enumerate(self.layers):
            next_x = layer.fprop(next_x)
            if self.subject_weights[l] > 0 or self.style_weights[l] > 0:
                x_feats[l] = next_x

        # Backward propagation
        grad = ca.zeros_like(next_x)
        loss = ca.zeros(1)
        for l, layer in reversed(list(enumerate(self.layers))):
            if self.subject_weights[l] > 0:
                diff = x_feats[l] - self.subject_feats[l]
                norm = ca.sum(ca.fabs(diff)) + 1e-8
                weight = float(self.subject_weights[l]) / norm
                grad += diff * weight
                loss += 0.5 * weight * ca.sum(diff**2)
            if self.style_weights[l] > 0:
                diff = gram_matrix(x_feats[l]) - self.style_grams[l]
                n_channels = diff.shape[0]
                x_feat = ca.reshape(x_feats[l], (n_channels, -1))
                style_grad = ca.reshape(ca.dot(diff, x_feat), x_feats[l].shape)
                norm = ca.sum(ca.fabs(style_grad))
                weight = float(self.style_weights[l]) / norm
                style_grad *= weight
                grad += style_grad
                loss += 0.25 * weight * ca.sum(diff**2)
            grad = layer.bprop(grad)

        if self.tv_weight > 0:
            x = ca.reshape(self.x.array, (3, 1) + grad.shape[2:])
            tv = self.tv_conv.fprop(x, self.tv_kernel)
            tv *= self.tv_weight
            grad -= ca.reshape(tv, grad.shape)

        ca.copyto(self.x.grad_array, grad)
        return loss
Exemple #46
0
def test_reduce():
    a_np = np.random.normal(size=(1024, ))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np)
    c_ca = ca.sum(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))
    c_np = np.mean(a_np)
    c_ca = ca.mean(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))

    a_np = np.random.normal(size=(5, 5))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np)
    c_ca = ca.sum(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=0)
    c_ca = ca.sum(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=1)
    c_ca = ca.sum(a_ca, axis=1)
    print(np.allclose(c_np, np.array(c_ca)))

    a_np = np.random.normal(size=(5, 7, 11))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np, axis=0)
    c_ca = ca.sum(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=2)
    c_ca = ca.sum(a_ca, axis=2)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=(0, 1))
    c_ca = ca.sum(a_ca, axis=(0, 1))
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=(1, 2))
    c_ca = ca.sum(a_ca, axis=(1, 2))
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.argmin(a_np, axis=0)
    c_ca = ca.argmin(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.argmin(a_np, axis=2)
    c_ca = ca.argmin(a_ca, axis=2)
    print(np.allclose(c_np, np.array(c_ca)))
Exemple #47
0
def test_reduce():
    a_np = np.random.normal(size=(1024,))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np)
    c_ca = ca.sum(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))
    c_np = np.mean(a_np)
    c_ca = ca.mean(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))

    a_np = np.random.normal(size=(5, 5))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np)
    c_ca = ca.sum(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=0)
    c_ca = ca.sum(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=1)
    c_ca = ca.sum(a_ca, axis=1)
    print(np.allclose(c_np, np.array(c_ca)))

    a_np = np.random.normal(size=(5, 7, 11))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np, axis=0)
    c_ca = ca.sum(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=2)
    c_ca = ca.sum(a_ca, axis=2)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=(0, 1))
    c_ca = ca.sum(a_ca, axis=(0, 1))
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=(1, 2))
    c_ca = ca.sum(a_ca, axis=(1, 2))
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.argmin(a_np, axis=0)
    c_ca = ca.argmin(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.argmin(a_np, axis=2)
    c_ca = ca.argmin(a_ca, axis=2)
    print(np.allclose(c_np, np.array(c_ca)))
    def bprop(self):
        ca.multiply(self._tmp_batch_centered, self.grad_array,
                    self.x.grad_array)
        tmp = ca.mean(self.x.grad_array, axis=0, keepdims=True)
        ca.multiply(self._tmp_batch_centered, tmp, self.x.grad_array)
        self.x.grad_array *= -1
        self.x.grad_array *= self._tmp_batch_inv_std
        self.x.grad_array *= self._tmp_batch_inv_std

        ca.mean(self.grad_array, axis=0, keepdims=True, out=tmp)
        self.x.grad_array += self.grad_array
        self.x.grad_array -= tmp
        self.x.grad_array *= self._tmp_batch_inv_std

        if self.affine:
            self.x.grad_array *= self.gamma.array
            # Normalized input
            self._tmp_batch_centered *= self._tmp_batch_inv_std
            self._tmp_batch_centered *= self.grad_array
            ca.sum(self._tmp_batch_centered, axis=0, keepdims=True,
                   out=self.gamma.grad_array)
            ca.sum(self.grad_array, axis=0, keepdims=True,
                   out=self.beta.grad_array)
Exemple #49
0
def matrix_factorization(R,
                         P,
                         Q,
                         mask,
                         steps=200000000,
                         alpha=0.00005,
                         beta=0.02):
    Q = ca.transpose(Q)
    for step in xrange(steps):
        E = ca.subtract(R, ca.multiply(ca.dot(P, Q), mask))

        rmse = ca.sqrt(ca.sum(ca.power(E, 2)) / ca.sum(mask))
        rmse = np.array(rmse)[0]

        print 'step: %i RMSE: %f' % (step, rmse)
        if rmse < 0.65:
            break
        P = ca.add(ca.multiply(P, (1 - alpha * beta)),
                   ca.multiply(ca.dot(E, ca.transpose(Q)), 2 * alpha))
        Q = ca.add(ca.multiply(Q, (1 - alpha * beta)),
                   ca.multiply(ca.dot(ca.transpose(P), E), 2 * alpha))

    return P, Q
Exemple #50
0
 def bprop(self, y_grad):
     ca.dot(self._tmp_x.T, y_grad, out=self.weights.grad_array)
     ca.sum(y_grad, axis=0, out=self.bias.grad_array)
     if self.bprop_to_x:
         return ca.dot(y_grad, self.weights.array.T)
Exemple #51
0
 def fprop(self, x1, x2):
     if self._tmp_x1 is not x1 or self._tmp_x2 is not x2:
         self._tmp_dists = ca.sum((x1-x2)**2, axis=1, keepdims=True)
         self._tmp_x1 = x1
         self._tmp_x2 = x2
     return self._tmp_dists
Exemple #52
0
 def bprop(self):
     ca.dot(self.x.out.T, self.out_grad, out=self.weights.grad_array)
     ca.dot(self.out_grad, self.weights.array.T, out=self.x.out_grad)
     if self.bias is not None:
         ca.sum(self.out_grad, axis=0, out=self.bias.grad_array)
Exemple #53
0
 def fprop(self):
     ca.sum(self.x.out, axis=self.axis, out=self.out,
            keepdims=self.keepdims)
Exemple #54
0
 def setup(self):
     self.out = ca.sum(self.x.out, axis=self.axis, keepdims=self.keepdims)
     self.out_shape = self.out.shape
     self.out = ca.empty(self.out_shape)
     self.out_grad = ca.empty(self.out_shape)
Exemple #55
0
 def bprop(self, y_grad):
     ca.dot(self._last_x.T, y_grad, out=self.W.grad_array)
     ca.sum(y_grad, axis=0, out=self.b.grad_array)
     return ca.dot(y_grad, self.W.array.T)