def update(self, learning_rate, stream=None): if self.l1_penalty > 0: op.add_vec_l1reg(self.W, self.dW, -learning_rate, self.l1_penalty, out=self.W, stream=stream) else: op.add_vec(self.W, -learning_rate, self.dW, stream=stream) op.add_vec(self.b, -learning_rate, self.db, stream=stream)
def bprop(self, delta, momentum=0.0): op.streams[2].synchronize() # make sure layer above is done self.dfunc(delta, self.A, self.Z, stream=op.streams[0]) op.streams[0].synchronize() op.add_dot(delta, self.X, self.dW, True, False, alpha=1.0 / delta.shape[0], beta=momentum, stream=op.streams[0]) m = op.mean(delta, axis=0, stream=op.streams[1]) op.add_vec(self.db, 1.0, m, beta=momentum, stream=op.streams[1]) if self.l2_penalty > 0: op.add_vec(self.dW, self.l2_penalty, self.W, stream=op.streams[0]) if not self.is_input_layer: if self.dropout > 0.0 and self.activation not in ("relu", "sigmoid"): return op.dot(delta, self.W) * self.M else: return op.dot(delta, self.W) else: return 0.0
def test_add_vec(): x = 5.0 * np.random.randn(10).astype(np.float32) y = 10.0 * np.random.randn(10).astype(np.float32) x_orig = x.copy() alpha = 2.5 z = x + alpha*y rtol = 1e-4 op.add_vec(x, alpha, y) assert_allclose(z, x, err_msg="CPU", rtol=rtol) xd = op.to_gpu(x_orig) yd = op.to_gpu(y) op.add_vec(xd, alpha, yd) res = op.to_cpu(xd) assert_allclose(z, res, err_msg="GPU", rtol=rtol) x = x_orig.copy() alpha = 2.5 beta = 0.5 z = beta*x + alpha*y rtol = 1e-4 op.add_vec(x, alpha, y, beta) assert_allclose(z, x, err_msg="CPU", rtol=rtol) xd = op.to_gpu(x_orig) yd = op.to_gpu(y) op.add_vec(xd, alpha, yd, beta) res = op.to_cpu(xd) assert_allclose(z, res, err_msg="GPU", rtol=rtol)
def test_add_vec(): x = 5.0 * np.random.randn(10).astype(np.float32) y = 10.0 * np.random.randn(10).astype(np.float32) x_orig = x.copy() alpha = 2.5 z = x + alpha * y rtol = 1e-4 op.add_vec(x, alpha, y) assert_allclose(z, x, err_msg="CPU", rtol=rtol) xd = op.to_gpu(x_orig) yd = op.to_gpu(y) op.add_vec(xd, alpha, yd) res = op.to_cpu(xd) assert_allclose(z, res, err_msg="GPU", rtol=rtol) x = x_orig.copy() alpha = 2.5 beta = 0.5 z = beta * x + alpha * y rtol = 1e-4 op.add_vec(x, alpha, y, beta) assert_allclose(z, x, err_msg="CPU", rtol=rtol) xd = op.to_gpu(x_orig) yd = op.to_gpu(y) op.add_vec(xd, alpha, yd, beta) res = op.to_cpu(xd) assert_allclose(z, res, err_msg="GPU", rtol=rtol)
def bprop(self, delta, momentum=0.0): op.streams[2].synchronize() # make sure layer above is done self.dfunc(delta, self.A, self.Z, stream=op.streams[0]) op.streams[0].synchronize() op.add_dot(delta, self.X, self.dW, True, False, alpha=1.0/delta.shape[0], beta=momentum, stream=op.streams[0]) m = op.mean(delta, axis=0, stream=op.streams[1]) op.add_vec(self.db, 1.0, m, beta=momentum, stream=op.streams[1]) if self.l2_penalty > 0: op.add_vec(self.dW, self.l2_penalty, self.W, stream=op.streams[0]) if not self.is_input_layer: return op.dot(delta, self.W, stream=op.streams[2]) else: return 0.0