Beispiel #1
0
    def test_complex_graph(self):
        # easier debugging, start with small dimensions
        x = Variable((5, 5))
        cx = conv_nofft(
            np.array([[1, 1, 1]]) / 3,
            conv_nofft(np.array([[1], [1], [1]]) / 3, x))
        scx = subsample(cx, (2, 2))
        ed = scx - np.reshape(np.arange(3 * 3), (3, 3))
        w = Variable(x.shape + (2, ))
        gw = grad(w, 2)
        Ew = gw + transpose(gw, (0, 1, 3, 2))
        gx = grad(x, 2)
        tgx = pxwise_matrixmult(
            np.reshape(np.arange(5 * 5 * 2 * 2), (5, 5, 2, 2)), gx)
        e1 = tgx - w
        inshape = (5 * 5 + 5 * 5 * 2, )
        outshape = (3 * 3 + 5 * 5 * 2 * 2 + 5 * 5 * 2, )
        self._generic_check_adjoint(lambda x: (ed, e1, Ew),
                                    inshape,
                                    outshape,
                                    "complex",
                                    eps=5e-4)

        # continue with more values
        K1 = np.abs(random.rand(1, 5, 1))
        K2 = np.abs(random.rand(5, 1, 1))

        x = Variable((320, 240, 2))
        cx = conv_nofft(K1, conv_nofft(K2, x))
        scx = subsample(cx, (5, 5, 1))
        ed = scx - random.rand(64, 48, 2)

        w = Variable(x.shape + (2, ))
        gw = grad(w, 2)
        Ew = gw + transpose(gw, (0, 1, 2, 4, 3))
        gx = grad(x, 2)
        tgx = pxwise_matrixmult(random.rand(320, 240, 2, 2, 2), gx)
        e1 = tgx - w

        inshape = (320 * 240 * 2 + 320 * 240 * 2 * 2, )
        outshape = (64 * 48 * 2 + 320 * 240 * 2 * 2 * 2 + 320 * 240 * 2 * 2, )
        self._generic_check_adjoint(lambda x: (ed, e1, Ew),
                                    inshape,
                                    outshape,
                                    "complex2",
                                    eps=5e-4)
Beispiel #2
0
    def test_group_norm1(self):
        random.seed(0)

        x = Variable((10, 10, 2, 3))
        f = group_norm1(x, [2, 3])

        v = np.reshape(np.arange(10 * 10 * 2 * 3),
                       (10, 10, 2, 3)).astype(np.float32)
        xhat1 = f.prox(1, v.copy())
        xhat2 = f.prox_cuda(1, v.copy()).get()

        if not np.all(np.abs(xhat1 - xhat2) < 1e-4):
            logging.error(f.cuda_code)
            logging.error("failed: %f" % np.amax(np.abs(xhat1 - xhat2)))
        self.assertTrue(np.all(np.abs(xhat1 - xhat2) < 1e-4))

        eps = 1e-5
        maxeps = 0
        for i in range(50):
            v = random.rand(10, 10, 2, 3).astype(np.float32)
            rho = np.abs(random.rand(1))
            xhat1 = f.prox(rho, v.copy())
            xhat2 = f.prox_cuda(rho, v.copy()).get()

            err = np.amax(np.abs(xhat1 - xhat2))
            if not err < eps:
                logging.error(f.cuda_code)
                logging.error("failed: %f" % np.amax(np.abs(xhat1 - xhat2)))
            self.assertTrue(err < eps)
            maxeps = max(err, maxeps)

        for i in range(50):
            v = random.rand(10, 10, 2, 3).astype(np.float32)
            rho = np.abs(random.rand(1))
            alpha = np.abs(random.rand(1))
            beta = np.abs(random.rand(1))
            gamma = np.abs(random.rand(1))
            c = np.abs(random.rand(*f.c.shape))
            b = np.abs(random.rand(*f.b.shape))

            xhat1 = f.prox(rho,
                           v.copy(),
                           alpha=alpha,
                           beta=beta,
                           gamma=gamma,
                           c=c,
                           b=b)
            xhat2 = f.prox_cuda(rho, v.copy()).get()

            err = np.amax(np.abs(xhat1 - xhat2))
            if not err < eps:
                logging.error(f.cuda_code)
                logging.error("failed: %f" % np.amax(np.abs(xhat1 - xhat2)))
            self.assertTrue(err < eps)
            maxeps = max(err, maxeps)

        logging.info("Max proxfn error: %.2e" % maxeps)
Beispiel #3
0
    def test_performance(self):
        c = random.rand(2000, 2000)
        x = Variable([2000, 2000])
        K = np.abs(random.rand(9, 9))
        G = CompGraph(
            vstack([subsample((conv_nofft(K, x) - c) * 5, [2, 4]), x * 10]))
        xtest1 = random.rand(2000 * 2000).astype(np.float32)
        ytest1 = np.zeros(G.output_size, dtype=np.float32)
        t1_cpu = time.time()
        for i in range(10):
            ytest1 = G.forward(xtest1, ytest1)
        t2_cpu = time.time()

        xtest = gpuarray.to_gpu(xtest1.astype(np.float32))
        ytest = gpuarray.to_gpu(ytest1.astype(np.float32))
        t1_gpu = time.time()
        for i in range(10):
            ytest = G.forward_cuda(xtest, ytest)
        t2_gpu = time.time()

        t_cpu = t2_cpu - t1_cpu
        t_gpu = t2_gpu - t1_gpu
        logging.info("Forward timing: cpu=%.2f ms gpu=%.2f ms factor=%.3f" %
                     (t_cpu, t_gpu, t_gpu / t_cpu))
        self.assertTrue(t_gpu < t_cpu)

        t1_cpu = time.time()
        for i in range(10):
            xtest1 = G.adjoint(ytest1, xtest1)
        t2_cpu = time.time()

        t1_gpu = time.time()
        for i in range(10):
            xtest = G.adjoint_cuda(ytest, xtest)
        t2_gpu = time.time()

        t_cpu = t2_cpu - t1_cpu
        t_gpu = t2_gpu - t1_gpu
        logging.info("Adjoint timing: cpu=%.2f ms gpu=%.2f ms factor=%.3f" %
                     (t_cpu, t_gpu, t_gpu / t_cpu))
        self.assertTrue(t_gpu < t_cpu)
Beispiel #4
0
    def _generic_check_adjoint(self,
                               f,
                               inshape,
                               outshape,
                               s,
                               ntests=50,
                               eps=1e-5,
                               verbose=False,
                               in_out_sample=None):
        """
        Generic tests used for all comp graph tests on a parametrizable function f
        """
        x = Variable(inshape)
        func = f(x)
        if not type(func) is tuple:
            func = (func, )
        G = CompGraph(vstack(func))

        nin = functools.reduce(lambda x, y: x * y, inshape, 1)
        nout = functools.reduce(lambda x, y: x * y, outshape, 1)

        if not in_out_sample is None:
            # check against the given in/out samples
            x1 = in_out_sample[0]  # forward in
            y1s = in_out_sample[1]  # forward out
            y2 = in_out_sample[2]  # adjoint in
            x2s = in_out_sample[3]  # adjoint out

            y1a = G.forward_cuda(gpuarray.to_gpu(x1.astype(np.float32)),
                                 gpuarray.to_gpu(y1s.astype(
                                     np.float32))).get()
            #print(y1s)
            #print(y1a)
            self.assertTrue(np.amax(np.abs(y1a - y1s)) < eps)

            x2a = G.adjoint_cuda(gpuarray.to_gpu(y2.astype(np.float32)),
                                 gpuarray.to_gpu(x2s.astype(
                                     np.float32))).get()
            self.assertTrue(np.amax(np.abs(x2a - x2s)) < eps)

        # test with random data that the forward/adjoint operators are consistent
        maxerr = 0.0
        random.seed(0)  # make tests reproducable
        for tidx in range(ntests):
            x1 = random.rand(nin).astype(np.float32)
            y2 = random.rand(nout).astype(np.float32)
            y1 = np.zeros(nout, dtype=np.float32)
            x2 = np.zeros(nin, dtype=np.float32)

            if verbose:
                print("forward: ", end="")
            y1 = G.forward_cuda(gpuarray.to_gpu(x1),
                                gpuarray.to_gpu(y1),
                                printt=verbose).get()
            if verbose:
                print("adjoint: ", end="")
            x2 = G.adjoint_cuda(gpuarray.to_gpu(y2),
                                gpuarray.to_gpu(x2),
                                printt=verbose).get()

            self.assertTrue(not np.all(y1 == 0) and not np.all(x2 == 0))

            y1o = G.forward(x1, y1.copy())
            x2o = G.adjoint(y2, x2.copy())
            erro = abs(
                np.dot(x1.flatten().astype(np.float64),
                       x2o.flatten().astype(np.float64)) -
                np.dot(y1o.flatten().astype(np.float64),
                       y2.flatten().astype(np.float64)))

            err = abs(
                np.dot(x1.flatten().astype(np.float64),
                       x2.flatten().astype(np.float64)) -
                np.dot(y1.flatten().astype(np.float64),
                       y2.flatten().astype(np.float64)))
            if err > maxerr:
                maxerr = err
            if verbose and err > eps:
                print("forward CUDA code:")
                print(G.cuda_forward_subgraphs.cuda_code)
                print("backward CUDA code:")
                print(G.cuda_adjoint_subgraphs.cuda_code)
                print("x1\n", np.reshape(x1, inshape))
                print("y1\n", np.reshape(y1, outshape))
                print("y1o\n", np.reshape(y1o, outshape))
                print("y2\n", np.reshape(y2, outshape))
                print("x2\n", np.reshape(x2, inshape))
                print("x2o\n", np.reshape(x2o, inshape))
                print("(%d) Adjoint test (%s): gpu: %f, nogpu: %f" %
                      (tidx, s, err, erro))
                print("max(abs(y1-y1o)): %f" % (np.amax(np.abs(y1 - y1o))))
                print("max(abs(x2-x2o)): %f" % (np.amax(np.abs(x2 - x2o))))
            self.assertTrue(err <= eps)
        if verbose:
            print("%s passed %d tests. Max adjoint test error: %f" %
                  (s, ntests, maxerr))