def bprop(self): if self.lhs.bpropable: ca.divide(self.grad_array, self.rhs.array, out=self.lhs.grad_array) if self.rhs.bpropable: ca.multiply(self.grad_array, self.array, out=self.rhs.grad_array) self.rhs.grad_array /= self.rhs.array ca.negative(self.rhs.grad_array, out=self.rhs.grad_array)
def bprop(self): if self.lhs_bprop: ca.divide(self.out_grad, self.rhs.out, out=self.lhs.out_grad) if self.rhs_bprop: ca.multiply(self.out_grad, self.out, out=self.rhs.out_grad) self.rhs.out_grad /= self.rhs.out ca.negative(self.rhs.out_grad, out=self.rhs.out_grad)
def bprop(self): # -(target/pred - (1 - target)/(1 - pred)) tmp1 = 1 - self.target.out tmp2 = 1 - self.pred.out tmp2 += self.eps ca.divide(tmp1, tmp2, tmp1) ca.add(self.pred.out, self.eps, tmp2) ca.divide(self.target.out, tmp2, out=tmp2) ca.subtract(tmp1, tmp2, self.pred.out_grad) self.pred.out_grad *= self.out_grad
def bprop(self): # -(target/pred - (1 - target)/(1 - pred)) tmp1 = 1 - self.target.array tmp2 = 1 - self.pred.array tmp2 += self.eps ca.divide(tmp1, tmp2, tmp1) ca.add(self.pred.array, self.eps, tmp2) ca.divide(self.target.array, tmp2, out=tmp2) ca.subtract(tmp1, tmp2, self.pred.grad_array) self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def step(self, param, mean_square): grad = param.grad() # mean_square = decay*mean_square + (1 - decay)*grad mean_square *= self.decay tmp = grad**2 tmp *= (1 - self.decay) mean_square += tmp # step = -learn_rate*grad/(sqrt(mean_square) + eps) ca.sqrt(mean_square, tmp) tmp += self.eps ca.divide(grad, tmp, tmp) tmp *= -self.learn_rate param.step(tmp)
def fprop(self, x, phase): n_channels = x.shape[1] # Calculate local mean tmp = self.conv_op.fprop(x, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) # Center input with local mean centered = ca.subtract(x, tmp) # Calculate local standard deviation tmp = ca.power(centered, 2) tmp = self.conv_op.fprop(tmp, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) ca.sqrt(tmp, tmp) # Scale centered input with standard deviation return centered / (tmp + self.eps)
def fprop(self, x): n_channels = x.shape[1] # Calculate local mean tmp = self.conv_op.fprop(x, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) # Center input with local mean centered = ca.subtract(x, tmp) # Calculate local standard deviation tmp = ca.power(centered, 2) tmp = self.conv_op.fprop(tmp, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) ca.sqrt(tmp, tmp) # Scale centered input with standard deviation return centered / (tmp + self.eps)
def step(self, param, state): m, v, t = state grad = param.grad() t += 1 t = int(t) # m = beta1*m + (1 - beta1)*grad m *= self.beta1 tmp = (1 - self.beta1)*grad m += tmp # v = beta2*v + (1 - beta2)*grad**2 v *= self.beta2 ca.power(grad, 2, tmp) tmp *= (1 - self.beta2) v += tmp # alpha = learn_rate*sqrt(1 - beta2**t)/(1 - beta1**t) # step = -alpha_t*m/(sqrt(v) + eps) alpha = self.learn_rate*np.sqrt(1 - self.beta2**t)/(1 - self.beta1**t) ca.sqrt(v, tmp) tmp += self.eps ca.divide(m, tmp, tmp) tmp *= -alpha param.step(tmp)
def test_binary(): a_np = np.random.normal(size=(5, 5)) b_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.add(a_np, b_np) c_ca = ca.add(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) np.add(a_np, b_np, a_np) ca.add(a_ca, b_ca, a_ca) print(np.allclose(a_np, np.array(a_ca))) np.multiply(a_np, b_np, a_np) ca.multiply(a_ca, b_ca, a_ca) print(np.allclose(a_np, np.array(a_ca))) a_np = np.random.normal(size=(5, 5)) b_np = np.random.normal(size=(5, 5)) > 0 a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal() b_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.divide(a_np, b_np) c_ca = ca.divide(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.subtract(a_np, b_np) c_ca = ca.subtract(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca)))
def bprop(self): ca.divide(1.0, self.x.array, out=self.x.grad_array) self.x.grad_array *= self.grad_array
def bprop(self): ca.divide(1.0, self.x.out, out=self.x.out_grad) self.x.out_grad *= self.out_grad
def fprop(self): ca.divide(self.lhs.out, self.rhs.out, out=self.out)
def bprop(self): ca.negative(self.x.out, self.x.out_grad) ca.exp(self.x.out_grad, self.x.out_grad) self.x.out_grad += 1 ca.divide(1.0, self.x.out_grad, out=self.x.out_grad) self.x.out_grad *= self.out_grad
def fprop(self): ca.divide(self.lhs.array, self.rhs.array, out=self.array)