def test_complex_graph(self): # easier debugging, start with small dimensions x = Variable((5, 5)) cx = conv_nofft( np.array([[1, 1, 1]]) / 3, conv_nofft(np.array([[1], [1], [1]]) / 3, x)) scx = subsample(cx, (2, 2)) ed = scx - np.reshape(np.arange(3 * 3), (3, 3)) w = Variable(x.shape + (2, )) gw = grad(w, 2) Ew = gw + transpose(gw, (0, 1, 3, 2)) gx = grad(x, 2) tgx = pxwise_matrixmult( np.reshape(np.arange(5 * 5 * 2 * 2), (5, 5, 2, 2)), gx) e1 = tgx - w inshape = (5 * 5 + 5 * 5 * 2, ) outshape = (3 * 3 + 5 * 5 * 2 * 2 + 5 * 5 * 2, ) self._generic_check_adjoint(lambda x: (ed, e1, Ew), inshape, outshape, "complex", eps=5e-4) # continue with more values K1 = np.abs(random.rand(1, 5, 1)) K2 = np.abs(random.rand(5, 1, 1)) x = Variable((320, 240, 2)) cx = conv_nofft(K1, conv_nofft(K2, x)) scx = subsample(cx, (5, 5, 1)) ed = scx - random.rand(64, 48, 2) w = Variable(x.shape + (2, )) gw = grad(w, 2) Ew = gw + transpose(gw, (0, 1, 2, 4, 3)) gx = grad(x, 2) tgx = pxwise_matrixmult(random.rand(320, 240, 2, 2, 2), gx) e1 = tgx - w inshape = (320 * 240 * 2 + 320 * 240 * 2 * 2, ) outshape = (64 * 48 * 2 + 320 * 240 * 2 * 2 * 2 + 320 * 240 * 2 * 2, ) self._generic_check_adjoint(lambda x: (ed, e1, Ew), inshape, outshape, "complex2", eps=5e-4)
def test_group_norm1(self): random.seed(0) x = Variable((10, 10, 2, 3)) f = group_norm1(x, [2, 3]) v = np.reshape(np.arange(10 * 10 * 2 * 3), (10, 10, 2, 3)).astype(np.float32) xhat1 = f.prox(1, v.copy()) xhat2 = f.prox_cuda(1, v.copy()).get() if not np.all(np.abs(xhat1 - xhat2) < 1e-4): logging.error(f.cuda_code) logging.error("failed: %f" % np.amax(np.abs(xhat1 - xhat2))) self.assertTrue(np.all(np.abs(xhat1 - xhat2) < 1e-4)) eps = 1e-5 maxeps = 0 for i in range(50): v = random.rand(10, 10, 2, 3).astype(np.float32) rho = np.abs(random.rand(1)) xhat1 = f.prox(rho, v.copy()) xhat2 = f.prox_cuda(rho, v.copy()).get() err = np.amax(np.abs(xhat1 - xhat2)) if not err < eps: logging.error(f.cuda_code) logging.error("failed: %f" % np.amax(np.abs(xhat1 - xhat2))) self.assertTrue(err < eps) maxeps = max(err, maxeps) for i in range(50): v = random.rand(10, 10, 2, 3).astype(np.float32) rho = np.abs(random.rand(1)) alpha = np.abs(random.rand(1)) beta = np.abs(random.rand(1)) gamma = np.abs(random.rand(1)) c = np.abs(random.rand(*f.c.shape)) b = np.abs(random.rand(*f.b.shape)) xhat1 = f.prox(rho, v.copy(), alpha=alpha, beta=beta, gamma=gamma, c=c, b=b) xhat2 = f.prox_cuda(rho, v.copy()).get() err = np.amax(np.abs(xhat1 - xhat2)) if not err < eps: logging.error(f.cuda_code) logging.error("failed: %f" % np.amax(np.abs(xhat1 - xhat2))) self.assertTrue(err < eps) maxeps = max(err, maxeps) logging.info("Max proxfn error: %.2e" % maxeps)
def test_performance(self): c = random.rand(2000, 2000) x = Variable([2000, 2000]) K = np.abs(random.rand(9, 9)) G = CompGraph( vstack([subsample((conv_nofft(K, x) - c) * 5, [2, 4]), x * 10])) xtest1 = random.rand(2000 * 2000).astype(np.float32) ytest1 = np.zeros(G.output_size, dtype=np.float32) t1_cpu = time.time() for i in range(10): ytest1 = G.forward(xtest1, ytest1) t2_cpu = time.time() xtest = gpuarray.to_gpu(xtest1.astype(np.float32)) ytest = gpuarray.to_gpu(ytest1.astype(np.float32)) t1_gpu = time.time() for i in range(10): ytest = G.forward_cuda(xtest, ytest) t2_gpu = time.time() t_cpu = t2_cpu - t1_cpu t_gpu = t2_gpu - t1_gpu logging.info("Forward timing: cpu=%.2f ms gpu=%.2f ms factor=%.3f" % (t_cpu, t_gpu, t_gpu / t_cpu)) self.assertTrue(t_gpu < t_cpu) t1_cpu = time.time() for i in range(10): xtest1 = G.adjoint(ytest1, xtest1) t2_cpu = time.time() t1_gpu = time.time() for i in range(10): xtest = G.adjoint_cuda(ytest, xtest) t2_gpu = time.time() t_cpu = t2_cpu - t1_cpu t_gpu = t2_gpu - t1_gpu logging.info("Adjoint timing: cpu=%.2f ms gpu=%.2f ms factor=%.3f" % (t_cpu, t_gpu, t_gpu / t_cpu)) self.assertTrue(t_gpu < t_cpu)
def _generic_check_adjoint(self, f, inshape, outshape, s, ntests=50, eps=1e-5, verbose=False, in_out_sample=None): """ Generic tests used for all comp graph tests on a parametrizable function f """ x = Variable(inshape) func = f(x) if not type(func) is tuple: func = (func, ) G = CompGraph(vstack(func)) nin = functools.reduce(lambda x, y: x * y, inshape, 1) nout = functools.reduce(lambda x, y: x * y, outshape, 1) if not in_out_sample is None: # check against the given in/out samples x1 = in_out_sample[0] # forward in y1s = in_out_sample[1] # forward out y2 = in_out_sample[2] # adjoint in x2s = in_out_sample[3] # adjoint out y1a = G.forward_cuda(gpuarray.to_gpu(x1.astype(np.float32)), gpuarray.to_gpu(y1s.astype( np.float32))).get() #print(y1s) #print(y1a) self.assertTrue(np.amax(np.abs(y1a - y1s)) < eps) x2a = G.adjoint_cuda(gpuarray.to_gpu(y2.astype(np.float32)), gpuarray.to_gpu(x2s.astype( np.float32))).get() self.assertTrue(np.amax(np.abs(x2a - x2s)) < eps) # test with random data that the forward/adjoint operators are consistent maxerr = 0.0 random.seed(0) # make tests reproducable for tidx in range(ntests): x1 = random.rand(nin).astype(np.float32) y2 = random.rand(nout).astype(np.float32) y1 = np.zeros(nout, dtype=np.float32) x2 = np.zeros(nin, dtype=np.float32) if verbose: print("forward: ", end="") y1 = G.forward_cuda(gpuarray.to_gpu(x1), gpuarray.to_gpu(y1), printt=verbose).get() if verbose: print("adjoint: ", end="") x2 = G.adjoint_cuda(gpuarray.to_gpu(y2), gpuarray.to_gpu(x2), printt=verbose).get() self.assertTrue(not np.all(y1 == 0) and not np.all(x2 == 0)) y1o = G.forward(x1, y1.copy()) x2o = G.adjoint(y2, x2.copy()) erro = abs( np.dot(x1.flatten().astype(np.float64), x2o.flatten().astype(np.float64)) - np.dot(y1o.flatten().astype(np.float64), y2.flatten().astype(np.float64))) err = abs( np.dot(x1.flatten().astype(np.float64), x2.flatten().astype(np.float64)) - np.dot(y1.flatten().astype(np.float64), y2.flatten().astype(np.float64))) if err > maxerr: maxerr = err if verbose and err > eps: print("forward CUDA code:") print(G.cuda_forward_subgraphs.cuda_code) print("backward CUDA code:") print(G.cuda_adjoint_subgraphs.cuda_code) print("x1\n", np.reshape(x1, inshape)) print("y1\n", np.reshape(y1, outshape)) print("y1o\n", np.reshape(y1o, outshape)) print("y2\n", np.reshape(y2, outshape)) print("x2\n", np.reshape(x2, inshape)) print("x2o\n", np.reshape(x2o, inshape)) print("(%d) Adjoint test (%s): gpu: %f, nogpu: %f" % (tidx, s, err, erro)) print("max(abs(y1-y1o)): %f" % (np.amax(np.abs(y1 - y1o)))) print("max(abs(x2-x2o)): %f" % (np.amax(np.abs(x2 - x2o)))) self.assertTrue(err <= eps) if verbose: print("%s passed %d tests. Max adjoint test error: %f" % (s, ntests, maxerr))