Beispiel #1
0
def test_gemv_dot_strides():
    # Reported in https://github.com/Theano/Theano/issues/6142
    xv = rand(5)
    yv = rand(5, 1)
    x = gpuarray_shared_constructor(xv)
    y = gpuarray_shared_constructor(yv, broadcastable=(False, True))
    f = theano.function([], tensor.dot(x, y[::-1]), mode=mode_with_gpu)
    out = f()
    utt.assert_allclose(out, np.dot(xv, yv[::-1]))
Beispiel #2
0
    def test_gpu_singular_values(self):
        A = aesara.tensor.fmatrix("A")
        f_cpu = aesara.function(
            [A], aesara.tensor.nlinalg.svd(A, compute_uv=False), mode=mode_without_gpu
        )
        f_gpu = aesara.function([A], gpu_svd(A, compute_uv=False), mode=mode_with_gpu)

        A_val = rand(50, 100).astype("float32")
        utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))

        A_val = rand(100, 50).astype("float32")
        utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
Beispiel #3
0
 def check_gpu_eigh(self,
                    N,
                    UPLO="L",
                    compute_v=True,
                    rtol=None,
                    atol=None):
     A = rand(N, N).astype("float32")
     A = np.dot(A.T, A)
     d_np, v_np = np.linalg.eigh(A, UPLO=UPLO)
     if compute_v:
         d_gpu, v_gpu = self.run_gpu_eigh(A, UPLO=UPLO, compute_v=compute_v)
     else:
         d_gpu = self.run_gpu_eigh(A, UPLO=UPLO, compute_v=False)
     utt.assert_allclose(d_np, d_gpu, rtol=rtol, atol=atol)
     if compute_v:
         utt.assert_allclose(np.eye(N),
                             np.dot(v_gpu, v_gpu.T),
                             rtol=rtol,
                             atol=atol)
         D_m = np.zeros_like(A)
         np.fill_diagonal(D_m, d_gpu)
         utt.assert_allclose(A,
                             np.dot(np.dot(v_gpu, D_m), v_gpu.T),
                             rtol=rtol,
                             atol=atol)
Beispiel #4
0
 def rand_symmetric(self, N):
     A = rand(N, N).astype("float32")
     # ensure that eigenvalues are not too small which sometimes results in
     # magma cholesky failure due to gpu limited numerical precision
     D, W = np.linalg.eigh(A)
     D[D < 1] = 1
     V_m = np.zeros_like(A)
     np.fill_diagonal(V_m, D)
     return np.dot(np.dot(W.T, V_m), W)
Beispiel #5
0
    def check_gpu_qr(self, M, N, complete=True, rtol=None, atol=None):
        A = rand(M, N).astype("float32")
        if complete:
            Q_gpu, R_gpu = self.run_gpu_qr(A, complete=complete)
        else:
            R_gpu = self.run_gpu_qr(A, complete=complete)

        Q_np, R_np = np.linalg.qr(A, mode="reduced")
        utt.assert_allclose(R_np, R_gpu, rtol=rtol, atol=atol)
        if complete:
            utt.assert_allclose(Q_np, Q_gpu, rtol=rtol, atol=atol)
Beispiel #6
0
 def test_pool_py_interface(self):
     shp = (2, 2, 2, 2)
     inp = aesara.shared(rand(*shp), "a")
     inp = aet.as_tensor_variable(inp)
     with pytest.raises(ValueError):
         # test when pad >= ws
         ds_op = GpuPool(ignore_border=True, ndim=2)
         ds_op(inp, [2, 2], pad=[3, 3])
     with pytest.raises(ValueError):
         # test when ignore_border and pad >= 0
         ds_op = GpuPool(ignore_border=False, ndim=2)
         ds_op(inp, [2, 2], pad=[1, 1])
Beispiel #7
0
    def test_pool_big_ws(self):
        gpu_mode = mode_with_gpu.excluding("cudnn")
        gpu_mode.check_py_code = False

        shp = (2, 2, 2, 2)
        inp = aesara.shared(rand(*shp), "a")
        inp = aet.as_tensor_variable(inp)
        ds_op = GpuPool(ignore_border=False, mode="average_exc_pad", ndim=2)
        pad = aet.as_tensor_variable([0, 0])
        f = aesara.function(
            [], ds_op(inp, [5, 5], stride=[1, 1], pad=pad), mode=gpu_mode
        )
        f()
Beispiel #8
0
    def test_pool_c_interface(self):
        gpu_mode = mode_with_gpu.excluding("cudnn")
        gpu_mode.check_py_code = False

        shp = (2, 2, 2, 2)
        inp = aesara.shared(rand(*shp), "a")
        inp = aet.as_tensor_variable(inp)
        with pytest.raises(ValueError):
            # test when ignore_border and pad >= 0
            ds_op = GpuPool(ignore_border=False, ndim=2)
            pad = aet.as_tensor_variable([1, 1])
            f = aesara.function([], ds_op(inp, [2, 2], pad=pad), mode=gpu_mode)
            f()
Beispiel #9
0
def test_float16():
    # gemv (gemm called)
    float16_data = [
        rand(3).astype("float16"),
        np.asarray(1, dtype=np.float32),
        rand(3, 3).astype("float16"),
        rand(3).astype("float16"),
        np.asarray(0.5, dtype=np.float32),
    ]
    float16_shared = [
        gpuarray_shared_constructor(val, target=test_ctx_name)
        for val in float16_data
    ]
    o = gemv(*float16_shared)
    f = theano.function([], o, mode=mode_with_gpu)
    y, alpha, A, x, beta = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
    topo = f.maker.fgraph.toposort()
    assert any([isinstance(n.op, GpuGemm) for n in topo])

    # gemm
    float16_data = [
        rand(3, 3).astype("float16"),
        np.asarray(1, dtype=np.float32),
        rand(3, 3).astype("float16"),
        rand(3, 3).astype("float16"),
        np.asarray(0.5, dtype=np.float32),
    ]
    float16_shared = [
        gpuarray_shared_constructor(val, target=test_ctx_name)
        for val in float16_data
    ]
    o = gpugemm_no_inplace(*float16_shared)
    f = theano.function([], o)
    y, alpha, A, x, beta = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)

    # dot22
    float16_data = [rand(3, 3).astype("float16"), rand(3, 3).astype("float16")]

    float16_shared = [gpuarray_shared_constructor(val) for val in float16_data]
    o = gpu_dot22(*float16_shared)
    f = theano.function([], o)
    x, y = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), np.dot(x, y))
Beispiel #10
0
    def test_gpu_svd_tall(self):
        A = rand(50, 100).astype("float32")
        M, N = A.shape

        U, S, VT = self.run_gpu_svd(A)
        self.assert_column_orthonormal(U)
        self.assert_column_orthonormal(VT.T)
        self.check_svd(A, U, S, VT)

        U, S, VT = self.run_gpu_svd(A, full_matrices=False)
        assert U.shape[1], min(M, N)
        self.assert_column_orthonormal(U)
        assert VT.shape[0], min(M, N)
        self.assert_column_orthonormal(VT.T)
Beispiel #11
0
def test_pool2d():
    shps = [
        (1, 12),
        (1, 1, 12),
        (1, 1, 1, 12),
        (1, 1, 2, 2),
        (1, 1, 1, 1),
        (1, 1, 4, 4),
        (1, 1, 10, 11),
        (1, 2, 2, 2),
        (3, 5, 4, 4),
        (25, 1, 7, 7),
        (1, 1, 12, 12),
        (1, 1, 2, 14),
        (1, 1, 12, 14),
        (1, 1, 14, 14),
        (1, 1, 16, 16),
        (1, 1, 18, 18),
        (1, 1, 24, 24),
        (1, 6, 24, 24),
        (10, 1, 24, 24),
        (10, 6, 24, 24),
        (30, 6, 12, 12),
        (30, 2, 24, 24),
        (30, 6, 24, 24),
        (10, 10, 10, 11),
        (1, 1, 10, 1025),
        (1, 1, 10, 1023),
        (1, 1, 1025, 10),
        (1, 1, 1023, 10),
        (3, 2, 16, 16, 16),
        (3, 2, 6, 6, 6, 5),
        (3, 2, 6, 6, 6, 5, 7),
    ]

    np.random.RandomState(utt.fetch_seed()).shuffle(shps)
    test_ws = (2, 2), (3, 2), (1, 1)
    test_st = (2, 2), (3, 2), (1, 1)
    test_mode = ["max", "sum", "average_inc_pad", "average_exc_pad"]

    ref_mode = copy.copy(mode_without_gpu)
    ref_mode.check_py_code = False
    gpu_mode = mode_with_gpu.excluding("cudnn")
    gpu_mode.check_py_code = False

    for shp in shps:
        for mode, ws, st in itertools.product(test_mode, test_ws, test_st):
            if ws[0] > shp[-2] or ws[1] > shp[-1]:
                continue
            for ignore_border, pad in zip((True, False), [(1, 1), (0, 0)]):
                if pad[0] >= ws[0] or pad[1] >= ws[1]:
                    continue
                if mode == "average_exc_pad" and (pad[0] > 0 or pad[1] > 0):
                    continue
                # print('test_pool2d', shp, ws, st, pad, mode, ignore_border)
                ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border)

                a = aesara.shared(rand(*shp), "a")
                a_pooled = ds_op(aet.as_tensor_variable(a), ws, st, pad)

                f = aesara.function([], a_pooled, mode=gpu_mode)
                f2 = aesara.function([], a_pooled, mode=ref_mode)

                assert any(
                    [isinstance(node.op, GpuPool) for node in f.maker.fgraph.toposort()]
                )
                assert any(
                    [isinstance(node.op, Pool) for node in f2.maker.fgraph.toposort()]
                )
                assert np.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)

                a_pooled_grad = grad(a_pooled.sum(), a)

                g = aesara.function([], a_pooled_grad, mode=gpu_mode)
                g2 = aesara.function([], a_pooled_grad, mode=ref_mode)

                if mode == "max":
                    gop = GpuMaxPoolGrad
                    gop2 = MaxPoolGrad
                else:
                    gop = GpuAveragePoolGrad
                    gop2 = AveragePoolGrad
                assert any(
                    [isinstance(node.op, gop) for node in g.maker.fgraph.toposort()]
                )
                assert any(
                    [isinstance(node.op, gop2) for node in g2.maker.fgraph.toposort()]
                )

                assert np.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)

                # test rop and grad grad for max pooling
                # for average pooling grad grad is just average pooling grad
                if mode != "max":
                    continue

                ea = aesara.shared(rand(*shp), "ea")

                gr = aesara.function([], Rop(a_pooled, a, ea), mode=gpu_mode)
                gr2 = aesara.function([], Rop(a_pooled, a, ea), mode=ref_mode)

                assert any(
                    [
                        isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
                        for node in gr.maker.fgraph.toposort()
                    ]
                )
                assert any(
                    [
                        isinstance(node.op, DownsampleFactorMaxGradGrad)
                        for node in gr2.maker.fgraph.toposort()
                    ]
                )
                assert np.allclose(gr(), gr2()), (shp, ws, st, pad, mode, ignore_border)

                ggf = Lop(grad((a_pooled ** 2).sum(), a), a, a)

                gg = aesara.function([], ggf, mode=gpu_mode)
                gg2 = aesara.function([], ggf, mode=ref_mode)

                assert any(
                    [
                        isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
                        for node in gg.maker.fgraph.toposort()
                    ]
                )
                assert any(
                    [
                        isinstance(node.op, DownsampleFactorMaxGradGrad)
                        for node in gg2.maker.fgraph.toposort()
                    ]
                )
                assert np.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
Beispiel #12
0
    gpugemm_no_inplace,
    gpugemmbatch_inplace,
    gpugemv_inplace,
    gpugemv_no_inplace,
    gpuger_inplace,
    gpuger_no_inplace,
)
from theano.tensor.blas import _dot22, batched_dot, gemm_inplace, gemv, gemv_inplace

TestGpuGemv = makeTester(
    "GpuGemvTester",
    op=gemv_inplace,
    gpu_op=gpugemv_inplace,
    # It doesn't support float16
    cases=dict(
        dot_vv=[rand(1), 1.0, rand(1, 2),
                rand(2), 0.0],
        dot_vm=[rand(3), 1.0, rand(3, 2),
                rand(2), 0.0],
        float32=[
            rand(3).astype("float32"),
            np.float32(1),
            rand(3, 2).astype("float32"),
            rand(2).astype("float32"),
            np.float32(0),
        ],
        float64=[
            rand(3).astype("float64"),
            np.float64(1),
            rand(3, 2).astype("float64"),
            rand(2).astype("float64"),