Exemple #1
0
class TestGroupGpuCorr2d(TestGroupedConvNoOptim):
    mode = mode_with_gpu.excluding("cudnn")
    conv_op = GpuCorrMM
    conv_gradw_op = GpuCorrMM_gradWeights
    conv_gradi_op = GpuCorrMM_gradInputs
    flip_filter = True
    is_dnn = False
 def setup_method(self):
     self.mode = mode_with_gpu.excluding("constant_folding")
     self.gemv_op = gpu_sparse_block_gemv
     self.outer_op = gpu_sparse_block_outer
     self.gemv_class = GpuSparseBlockGemv
     self.outer_class = GpuSparseBlockOuter
     super().setup_method()
Exemple #3
0
 def run_gpu_cholesky(self, A_val, lower=True):
     A = aesara.tensor.fmatrix("A")
     f = aesara.function(
         [A],
         GpuMagmaCholesky(lower=lower)(A),
         mode=mode_with_gpu.excluding("cusolver"),
     )
     return f(A_val)
Exemple #4
0
 def test_gpu_cholesky_opt(self):
     A = aesara.tensor.matrix("A", dtype="float32")
     fn = aesara.function([A], cholesky(A), mode=mode_with_gpu.excluding("cusolver"))
     assert any(
         [
             isinstance(node.op, GpuMagmaCholesky)
             for node in fn.maker.fgraph.toposort()
         ]
     )
Exemple #5
0
    def test_pool_c_interface(self):
        gpu_mode = mode_with_gpu.excluding("cudnn")
        gpu_mode.check_py_code = False

        shp = (2, 2, 2, 2)
        inp = aesara.shared(rand(*shp), "a")
        inp = aet.as_tensor_variable(inp)
        with pytest.raises(ValueError):
            # test when ignore_border and pad >= 0
            ds_op = GpuPool(ignore_border=False, ndim=2)
            pad = aet.as_tensor_variable([1, 1])
            f = aesara.function([], ds_op(inp, [2, 2], pad=pad), mode=gpu_mode)
            f()
Exemple #6
0
    def test_pool_big_ws(self):
        gpu_mode = mode_with_gpu.excluding("cudnn")
        gpu_mode.check_py_code = False

        shp = (2, 2, 2, 2)
        inp = aesara.shared(rand(*shp), "a")
        inp = aet.as_tensor_variable(inp)
        ds_op = GpuPool(ignore_border=False, mode="average_exc_pad", ndim=2)
        pad = aet.as_tensor_variable([0, 0])
        f = aesara.function(
            [], ds_op(inp, [5, 5], stride=[1, 1], pad=pad), mode=gpu_mode
        )
        f()
    def setup_method(self):
        self.mode = mode_with_gpu.excluding("constant_folding")
        self.join_op = GpuJoin()
        self.split_op_class = GpuSplit
        # Use join instead of MakeVector since there is no MakeVector on GPU
        self.make_vector_op = GpuJoin()
        # this is to avoid errors with limited devices
        self.floatX = "float32"
        self.hide_error = aesara.config.mode not in ["DebugMode", "DEBUG_MODE"]

        def shared(x, **kwargs):
            return gpuarray_shared_constructor(x, target=test_ctx_name, **kwargs)

        self.shared = shared
def test_transfer_gpu_gpu():
    g = GpuArrayType(dtype="float32",
                     broadcastable=(False, False),
                     context_name=test_ctx_name)()

    av = np.asarray(rng.rand(5, 4), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))
    mode = mode_with_gpu.excluding("cut_gpua_host_transfers",
                                   "local_cut_gpua_host_gpua")
    f = theano.function([g], GpuToGpu(test_ctx_name)(g), mode=mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuToGpu)
    fv = f(gv)
    assert GpuArrayType.values_eq(fv, gv)
Exemple #9
0
 def test_magma_opt_float16(self):
     ops_to_gpu = [
         (MatrixInverse(), GpuMagmaMatrixInverse),
         (SVD(), GpuMagmaSVD),
         (QRFull(mode="reduced"), GpuMagmaQR),
         (QRIncomplete(mode="r"), GpuMagmaQR),
         # TODO: add support for float16 to Eigh numpy
         # (Eigh(), GpuMagmaEigh),
         (Cholesky(), GpuMagmaCholesky),
     ]
     for op, gpu_op in ops_to_gpu:
         A = aesara.tensor.matrix("A", dtype="float16")
         fn = aesara.function([A], op(A), mode=mode_with_gpu.excluding("cusolver"))
         assert any(
             [isinstance(node.op, gpu_op) for node in fn.maker.fgraph.toposort()]
         )
def test_shape():
    x = GpuArrayType(dtype="float32", broadcastable=[False, False, False])()
    v = gpuarray.zeros((3, 4, 5), dtype="float32", context=get_context(test_ctx_name))
    f = aesara.function([x], x.shape)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    if aesara.config.mode != "FAST_COMPILE":
        assert len(topo) == 4
        assert isinstance(topo[0].op, tt.opt.Shape_i)
        assert isinstance(topo[1].op, tt.opt.Shape_i)
        assert isinstance(topo[2].op, tt.opt.Shape_i)
        assert isinstance(topo[3].op, tt.opt.MakeVector)
    mode = mode_with_gpu.excluding("local_shape_to_shape_i")
    f = aesara.function([x], x.shape, mode=mode)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    assert len(topo) == 1
    assert isinstance(topo[0].op, tt.Shape)
Exemple #11
0
    def test_blocksparse_grad_merge(self):
        b = tensor.fmatrix()
        h = tensor.ftensor3()
        iIdx = tensor.lmatrix()
        oIdx = tensor.lmatrix()

        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
        W = gpuarray_shared_constructor(W_val, context=test_ctx_name)

        o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
        gW = theano.grad(o.sum(), W)

        lr = np.asarray(0.05, dtype="float32")

        upd = W - lr * gW

        f1 = theano.function([h, iIdx, b, oIdx],
                             updates=[(W, upd)],
                             mode=mode_with_gpu)

        # Make sure the lr update was merged.
        assert isinstance(f1.maker.fgraph.outputs[0].owner.op,
                          GpuSparseBlockOuter)

        # Exclude the merge optimizations.
        mode = mode_with_gpu.excluding("local_merge_blocksparse_alpha")
        mode = mode.excluding("local_merge_blocksparse_output")

        f2 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)

        # Make sure the lr update is not merged.
        assert not isinstance(f2.maker.fgraph.outputs[0].owner.op,
                              GpuSparseBlockOuter)

        f2(h_val, iIdx_val, b_val, oIdx_val)
        W_ref = W.get_value()

        # reset the var
        W.set_value(W_val)
        f1(h_val, iIdx_val, b_val, oIdx_val)
        W_opt = W.get_value()

        utt.assert_allclose(W_ref, W_opt)
Exemple #12
0
class TestGpuReduceDtype(test_elemwise.TestReduceDtype):
    mode = mode_with_gpu.excluding("local_cut_useless_reduce")

    # GpuDnnReduction doesn't cover all cases, but should cover some
    op = (GpuCAReduceCuda, GpuDnnReduction)
    # Currently we don't support reduction on 0 axis
    axes = [None, 0, 1, 1, [0], [1], [0, 1]]
    # We don't support complex dtype
    dtypes = [
        "int8",
        "int16",
        "int32",
        "int64",
        "uint8",
        "uint16",
        "uint32",
        "uint64",
        "float32",
        "float64",
    ]

    def setup_method(self):
        if get_context(test_ctx_name).kind != b"cuda":
            pytest.skip("Cuda specific tests")
Exemple #13
0
import numpy as np

import tests.unittest_tools as utt
import theano
import theano.tensor as tt
from tests.gpuarray.config import mode_with_gpu, mode_without_gpu
from theano.gpuarray.nnet import (
    GpuCrossentropySoftmax1HotWithBiasDx,
    GpuCrossentropySoftmaxArgmax1HotWithBias,
    GpuSoftmax,
    GpuSoftmaxWithBias,
)
from theano.tensor.nnet import crossentropy_softmax_1hot_with_bias_dx

mode_wo_cudnn = mode_with_gpu.excluding("cudnn")


def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias
    # We check that we loop when their is too much threads

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, theano.compile.DebugMode):
        n_in = 4098
        n_out = 4099

    y = tt.lvector("y")
Exemple #14
0
def test_pool2d():
    shps = [
        (1, 12),
        (1, 1, 12),
        (1, 1, 1, 12),
        (1, 1, 2, 2),
        (1, 1, 1, 1),
        (1, 1, 4, 4),
        (1, 1, 10, 11),
        (1, 2, 2, 2),
        (3, 5, 4, 4),
        (25, 1, 7, 7),
        (1, 1, 12, 12),
        (1, 1, 2, 14),
        (1, 1, 12, 14),
        (1, 1, 14, 14),
        (1, 1, 16, 16),
        (1, 1, 18, 18),
        (1, 1, 24, 24),
        (1, 6, 24, 24),
        (10, 1, 24, 24),
        (10, 6, 24, 24),
        (30, 6, 12, 12),
        (30, 2, 24, 24),
        (30, 6, 24, 24),
        (10, 10, 10, 11),
        (1, 1, 10, 1025),
        (1, 1, 10, 1023),
        (1, 1, 1025, 10),
        (1, 1, 1023, 10),
        (3, 2, 16, 16, 16),
        (3, 2, 6, 6, 6, 5),
        (3, 2, 6, 6, 6, 5, 7),
    ]

    np.random.RandomState(utt.fetch_seed()).shuffle(shps)
    test_ws = (2, 2), (3, 2), (1, 1)
    test_st = (2, 2), (3, 2), (1, 1)
    test_mode = ["max", "sum", "average_inc_pad", "average_exc_pad"]

    ref_mode = copy.copy(mode_without_gpu)
    ref_mode.check_py_code = False
    gpu_mode = mode_with_gpu.excluding("cudnn")
    gpu_mode.check_py_code = False

    for shp in shps:
        for mode, ws, st in itertools.product(test_mode, test_ws, test_st):
            if ws[0] > shp[-2] or ws[1] > shp[-1]:
                continue
            for ignore_border, pad in zip((True, False), [(1, 1), (0, 0)]):
                if pad[0] >= ws[0] or pad[1] >= ws[1]:
                    continue
                if mode == "average_exc_pad" and (pad[0] > 0 or pad[1] > 0):
                    continue
                # print('test_pool2d', shp, ws, st, pad, mode, ignore_border)
                ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border)

                a = aesara.shared(rand(*shp), "a")
                a_pooled = ds_op(aet.as_tensor_variable(a), ws, st, pad)

                f = aesara.function([], a_pooled, mode=gpu_mode)
                f2 = aesara.function([], a_pooled, mode=ref_mode)

                assert any(
                    [isinstance(node.op, GpuPool) for node in f.maker.fgraph.toposort()]
                )
                assert any(
                    [isinstance(node.op, Pool) for node in f2.maker.fgraph.toposort()]
                )
                assert np.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)

                a_pooled_grad = grad(a_pooled.sum(), a)

                g = aesara.function([], a_pooled_grad, mode=gpu_mode)
                g2 = aesara.function([], a_pooled_grad, mode=ref_mode)

                if mode == "max":
                    gop = GpuMaxPoolGrad
                    gop2 = MaxPoolGrad
                else:
                    gop = GpuAveragePoolGrad
                    gop2 = AveragePoolGrad
                assert any(
                    [isinstance(node.op, gop) for node in g.maker.fgraph.toposort()]
                )
                assert any(
                    [isinstance(node.op, gop2) for node in g2.maker.fgraph.toposort()]
                )

                assert np.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)

                # test rop and grad grad for max pooling
                # for average pooling grad grad is just average pooling grad
                if mode != "max":
                    continue

                ea = aesara.shared(rand(*shp), "ea")

                gr = aesara.function([], Rop(a_pooled, a, ea), mode=gpu_mode)
                gr2 = aesara.function([], Rop(a_pooled, a, ea), mode=ref_mode)

                assert any(
                    [
                        isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
                        for node in gr.maker.fgraph.toposort()
                    ]
                )
                assert any(
                    [
                        isinstance(node.op, DownsampleFactorMaxGradGrad)
                        for node in gr2.maker.fgraph.toposort()
                    ]
                )
                assert np.allclose(gr(), gr2()), (shp, ws, st, pad, mode, ignore_border)

                ggf = Lop(grad((a_pooled ** 2).sum(), a), a, a)

                gg = aesara.function([], ggf, mode=gpu_mode)
                gg2 = aesara.function([], ggf, mode=ref_mode)

                assert any(
                    [
                        isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
                        for node in gg.maker.fgraph.toposort()
                    ]
                )
                assert any(
                    [
                        isinstance(node.op, DownsampleFactorMaxGradGrad)
                        for node in gg2.maker.fgraph.toposort()
                    ]
                )
                assert np.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
Exemple #15
0
class TestGroupGpuCorr3d(TestGroupedConv3dNoOptim):
    mode = mode_with_gpu.excluding("cudnn")
    conv_op = GpuCorr3dMM
    conv_gradw_op = GpuCorr3dMM_gradWeights
    conv_gradi_op = GpuCorr3dMM_gradInputs
Exemple #16
0
 def setup_class(cls):
     super().setup_class()
     cls.shared = staticmethod(gpuarray_shared_constructor)
     cls.mode = mode_with_gpu.excluding("cudnn")
class TestFusion(test_opt.TestFusion):
    mode = mode_with_gpu.excluding("local_dnn_reduction")
    _shared = staticmethod(gpuarray_shared_constructor)
    topo_exclude = (GpuFromHost, HostFromGpu)
Exemple #18
0
    def test_one_sequence_one_output_weights_gpu1(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")

        mode = mode_with_gpu.excluding("InputToGpuOptimizer")
        output, updates = scan(
            f_rnn,
            u,
            x0,
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=mode,
        )

        output = GpuFromHost(test_ctx_name)(output)
        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=mode,
        )

        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        v_u = np.asarray(v_u, dtype="float32")
        v_x0 = np.asarray(v_x0, dtype="float32")
        W = np.asarray(W, dtype="float32")
        W_in = np.asarray(W_in, dtype="float32")

        # compute the output in numpy
        v_out = np.zeros((4, ))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in range(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W

        aesara_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_values, v_out)

        # TO DEL
        topo = f2.maker.fgraph.toposort()
        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]

        topo = f2.maker.fgraph.toposort()
        assert sum([isinstance(node.op, HostFromGpu) for node in topo]) == 0
        assert sum([isinstance(node.op, GpuFromHost) for node in topo]) == 4

        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any(
            [isinstance(node.op, GpuElemwise) for node in scan_node_topo])
        assert not any(
            [isinstance(node.op, HostFromGpu) for node in scan_node_topo])
        assert not any(
            [isinstance(node.op, GpuFromHost) for node in scan_node_topo])