def _fwd_grad(self, wrt, valuation, cache): q = pl.qs[0] lhs = cache[id(self.ops[0])] rhs = cache[id(self.ops[1])] a = linalg.dot(q, self.ops[0]._fwd_grad(wrt, valuation, cache), rhs) b = linalg.dot(q, lhs, self.ops[1]._fwd_grad(wrt, valuation, cache)) return a + b
def _rev_grad(self, valuation, adjoint, gradient, cache): q = pl.qs[0] lhs = cache[id(self.ops[0])] rhs = cache[id(self.ops[1])] adj1 = linalg.dot(q, adjoint, rhs, transB=not self.transB) adj2 = linalg.dot(q, lhs, adjoint, transA=not self.transA) self.ops[0]._rev_grad(valuation, adj1, gradient, cache) self.ops[1]._rev_grad(valuation, adj2, gradient, cache)
def _evaluate(self, valuation, cache): q = pl.qs[0] if id(self) not in cache: X = self.ops[0]._evaluate(valuation, cache) W = self.ops[1]._evaluate(valuation, cache) b = self.ops[2]._evaluate(valuation, cache) out_c, _, kh, kw = W.shape n, c, h, w = X.shape out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all) out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all) y = clarray.empty(q, (n, out_c, out_h, out_w), dtype=X.dtype) self.col, ev1 = conv.im2col(q, X, kh, kw, self.sy, self.sx, self.ph, self.pw, self.cover_all) W_mat = W.reshape(out_c, -1) ev1.wait() # TODO asynchronize col_mats = self.col.reshape(n, -1, out_h * out_w) y_mats = y.reshape(n, out_c, -1) for i in xrange(n): y_mats[i] = linalg.dot(q, W_mat, col_mats[i]) if b is not None: # y += b[:, None, None] _, ev3 = conv.bcast_add(q, y, b, y) ev3.wait() # TODO asynchronize cache[id(self)] = y return cache[id(self)]
def _evaluate(self, valuation, cache): q = pl.qs[0] if id(self) not in cache: e1, e2 = self.ops[0]._evaluate, self.ops[1]._evaluate cache[id(self)] = linalg.dot(q, e1(valuation, cache), e2(valuation, cache)) return cache[id(self)]
def _evaluate(self, valuation, cache): if id(self) not in cache: q = pl.qs[0] o1 = self.ops[0]._evaluate(valuation, cache) o2 = self.ops[1]._evaluate(valuation, cache) self.diff = o1 - o2 self.diffr = self.diff.ravel() dop = linalg.dot(q, self.diffr, self.diffr) cache[id(self)] = dop / (2.0 * self.diff.size) return cache[id(self)]
def test_dot(self): q = clplatf.qs[0] X = np.random.uniform(0, 1, (50000, )).astype(np.float32) Y = np.random.uniform(0, 1, (50000, )).astype(np.float32) expected = np.dot(X, Y) gX = clarray.to_device(q, X) gY = clarray.to_device(q, Y) gR = linalg.dot(q, gX, gY) R = gR.get() self.assertTrue(np.allclose(R, expected)) A = np.random.uniform(0, 1, (512, 512)).astype(np.float32) B = np.random.uniform(0, 1, (512, 512)).astype(np.float32) expected = np.dot(A, B) gA = clarray.to_device(q, A) gB = clarray.to_device(q, B) gC = linalg.dot(q, gA, gB) C = gC.get() self.assertTrue(np.allclose(C, expected))
def _rev_grad(self, valuation, adjoint, gradient, cache): q = pl.qs[0] X = cache[id(self.ops[0])] W = cache[id(self.ops[1])] b = cache[id(self.ops[2])] gy = adjoint _, out_c, out_h, out_w = gy.shape n, c, h, w = X.shape kh, kw = W.shape[2:] gW = clarray.zeros_like(W) gW_mat = gW.reshape(out_c, c * kh * kw) col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w) gy_mats = gy.reshape(n, out_c, out_h * out_w) for i in xrange(n): gwmat = linalg.dot(q, gy_mats[i], col_mats[i], transB=True) gW_mat += gwmat W_mat = W.reshape(out_c, -1) gcol = clarray.empty_like(self.col) gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w) for i in xrange(n): gcol_mats[i] = linalg.dot(q, W_mat, gy_mats[i], transA=True) gx, ev = conv.col2im(q, gcol, self.sy, self.sx, self.ph, self.pw, h, w) ev.wait() gb = None if b is not None: gb, ev = conv.bgrads_sum(q, gy) ev.wait() # TODO bias... sum along multiple axes of gy? # TODO set gW, gx and gb in gradient dict self.ops[0]._rev_grad(valuation, gx, gradient, cache) self.ops[1]._rev_grad(valuation, gW, gradient, cache) if gb is not None: self.ops[2]._rev_grad(valuation, gb, gradient, cache)
def test_dot_offentdingvectors(self): q = clplatf.qs[0] X = np.loadtxt(open('test/gymat.txt', 'r'), delimiter=',').astype(np.float32) Y = np.loadtxt(open('test/colmat.txt', 'r'), delimiter=',').astype(np.float32) gX = clarray.to_device(q, X) gY = clarray.to_device(q, Y) expected = X.dot(Y.T) gR = linalg.dot(q, gX, gY, transB=True) R = gR.get() print >> sys.stderr, '\nReal:\n', R print >> sys.stderr, 'expected:\n', expected print >> sys.stderr, 'shapes: r:', R.shape, 'e:', expected.shape print >> sys.stderr, 'mean diff:', np.mean(R - expected) self.assertTrue(np.allclose(R, expected))
def test_dot_again(self): q = clplatf.qs[0] X = np.random.uniform(0, 1, (128, 64, 1024)).astype(np.float32) Y = np.random.uniform(0, 1, (128, 27, 1024)).astype(np.float32) gX = clarray.to_device(q, X) gY = clarray.to_device(q, Y) for i in range(128): expected = X[i].dot(Y[i].T) gR = linalg.dot(q, gX[i], gY[i], transB=True) R = gR.get() if not np.allclose(R, expected): print >> sys.stderr, '\nReal:\n', R print >> sys.stderr, 'expected:\n', expected print >> sys.stderr, 'shapes: r:', R.shape, 'e:', expected.shape print >> sys.stderr, 'mean diff:', np.mean(R - expected) break self.assertTrue(np.allclose(R, expected))
def train(self, X, Y, learning_rate=0.01): val = pl.valuation() val['X'] = X val['Y'] = Y for name, value in self.params: val[name] = value grad = self.cost.rev_grad(val) debatch_help_vector = clarray.zeros(pl.qs[0], (Y.shape[0], 1), dtype=np.float32) + 1 for name, value in self.params: if name.startswith('b'): dbh = linalg.dot(pl.qs[0], grad[name], debatch_help_vector, transA=True) value -= learning_rate * dbh.ravel() else: value -= learning_rate * grad[name]