def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(self.x.out, axis=0, keepdims=True) # Center input ca.subtract(self.x.out, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.out) ca.mean(self.out, axis=0, keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.out) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp) self.running_std += tmp elif self.phase == 'test': ca.subtract(self.x.out, self.running_mean, self.out) self.out *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.out *= self.gamma.array self.out += self.beta.array
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(self.x.out, axis=0, keepdims=True) # Center input ca.subtract(self.x.out, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.out) ca.mean(self.out, axis=0, keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.out) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1 - self.momentum, tmp) self.running_std += tmp elif self.phase == 'test': ca.subtract(self.x.out, self.running_mean, self.out) self.out *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.out *= self.gamma.array self.out += self.beta.array
def fprop(self): # e_i = exp(x_i - max(x)) # y = e_i / sum(e) tmp1 = ca.amax(self.x.array, axis=1, keepdims=True) ca.subtract(self.x.array, tmp1, self.array) ca.exp(self.array, self.array) ca.sum(self.array, axis=1, keepdims=True, out=tmp1) self.array /= tmp1
def bprop(self): # -(target/pred - (1 - target)/(1 - pred)) tmp1 = 1 - self.target.out tmp2 = 1 - self.pred.out tmp2 += self.eps ca.divide(tmp1, tmp2, tmp1) ca.add(self.pred.out, self.eps, tmp2) ca.divide(self.target.out, tmp2, out=tmp2) ca.subtract(tmp1, tmp2, self.pred.out_grad) self.pred.out_grad *= self.out_grad
def bprop(self): # -(target/pred - (1 - target)/(1 - pred)) tmp1 = 1 - self.target.array tmp2 = 1 - self.pred.array tmp2 += self.eps ca.divide(tmp1, tmp2, tmp1) ca.add(self.pred.array, self.eps, tmp2) ca.divide(self.target.array, tmp2, out=tmp2) ca.subtract(tmp1, tmp2, self.pred.grad_array) self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(ca.mean(self.x.array, axis=0, keepdims=True), axis=(2, 3), keepdims=True) # Center input ca.subtract(self.x.array, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.array) ca.mean(ca.mean(self.array, axis=0, keepdims=True), axis=(2, 3), keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.array) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp) self.running_std += tmp if self.noise_std > 0.0: noise = ca.random.normal(scale=self.noise_std, size=self.shape) ca.add(self.array, noise, self.array) elif self.phase == 'test': ca.subtract(self.x.array, self.running_mean, self.array) self.array *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.array *= self.gamma.array self.array += self.beta.array
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P,Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E,2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P,(1-alpha*beta)),ca.multiply(ca.dot(E,ca.transpose(Q)), 2*alpha)) Q = ca.add(ca.multiply(Q,(1-alpha*beta)),ca.multiply(ca.dot(ca.transpose(P),E),2*alpha)) return P, Q
def test_binary(): a_np = np.random.normal(size=(5, 5)) b_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.add(a_np, b_np) c_ca = ca.add(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) np.add(a_np, b_np, a_np) ca.add(a_ca, b_ca, a_ca) print(np.allclose(a_np, np.array(a_ca))) np.multiply(a_np, b_np, a_np) ca.multiply(a_ca, b_ca, a_ca) print(np.allclose(a_np, np.array(a_ca))) a_np = np.random.normal(size=(5, 5)) b_np = np.random.normal(size=(5, 5)) > 0 a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal() b_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.divide(a_np, b_np) c_ca = ca.divide(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.subtract(a_np, b_np) c_ca = ca.subtract(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca)))
def fprop(self, x): n_channels = x.shape[1] # Calculate local mean tmp = self.conv_op.fprop(x, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) # Center input with local mean centered = ca.subtract(x, tmp) # Calculate local standard deviation tmp = ca.power(centered, 2) tmp = self.conv_op.fprop(tmp, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) ca.sqrt(tmp, tmp) # Scale centered input with standard deviation return centered / (tmp + self.eps)
def fprop(self, x, phase): n_channels = x.shape[1] # Calculate local mean tmp = self.conv_op.fprop(x, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) # Center input with local mean centered = ca.subtract(x, tmp) # Calculate local standard deviation tmp = ca.power(centered, 2) tmp = self.conv_op.fprop(tmp, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) ca.sqrt(tmp, tmp) # Scale centered input with standard deviation return centered / (tmp + self.eps)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P, Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E, 2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P, (1 - alpha * beta)), ca.multiply(ca.dot(E, ca.transpose(Q)), 2 * alpha)) Q = ca.add(ca.multiply(Q, (1 - alpha * beta)), ca.multiply(ca.dot(ca.transpose(P), E), 2 * alpha)) return P, Q
def bprop(self): ca.subtract(self.pred.array, self.target.array, self.pred.grad_array) self.pred.grad_array *= 2 self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def bprop(self): ca.subtract(self.pred.array, self.target.array, self.pred.grad_array) self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def fprop(self): ca.subtract(self.lhs.out, self.rhs.out, out=self.out)
def bprop(self): ca.subtract(self.pred.array, self.target.array, self.pred.grad_array) if self.sigma != 1.0: self.pred.grad_array *= 2 * self.multiplier self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def fprop(self): ca.subtract(self.lhs.array, self.rhs.array, out=self.array)
def bprop(self): ca.subtract(self.pred.out, self.target.out, self.pred.out_grad) self.pred.out_grad *= 2*self.multiplier self.pred.out_grad *= self.out_grad
def bprop(self): ca.subtract(self.pred.array, self.target.array, self.pred.grad_array) if self.sigma != 1.0: self.pred.grad_array *= 2*self.multiplier self.pred.grad_array *= ca.reshape(self.grad_array, self.bcast_shape)
def bprop(self): # y_i * (y_grad_i - sum(y_grad * y)) ca.multiply(self.array, self.grad_array, self.x.grad_array) tmp1 = ca.sum(self.x.grad_array, axis=1, keepdims=True) ca.subtract(self.grad_array, tmp1, self.x.grad_array) self.x.grad_array *= self.array
def bprop(self): ca.subtract(self.pred.out, self.target.out, self.pred.out_grad) self.pred.out_grad *= 2 self.pred.out_grad *= self.out_grad