Ejemplo n.º 1
0
def test_gemv_dot_strides():
    # Reported in https://github.com/Theano/Theano/issues/6142
    xv = rand(5)
    yv = rand(5, 1)
    x = gpuarray_shared_constructor(xv)
    y = gpuarray_shared_constructor(yv, broadcastable=(False, True))
    f = aesara.function([], dot(x, y[::-1]), mode=mode_with_gpu)
    out = f()
    utt.assert_allclose(out, np.dot(xv, yv[::-1]))
Ejemplo n.º 2
0
def test_float16():
    # gemv (gemm called)
    float16_data = [
        rand(3).astype("float16"),
        np.asarray(1, dtype=np.float32),
        rand(3, 3).astype("float16"),
        rand(3).astype("float16"),
        np.asarray(0.5, dtype=np.float32),
    ]
    float16_shared = [
        gpuarray_shared_constructor(val, target=test_ctx_name)
        for val in float16_data
    ]
    o = gemv(*float16_shared)
    f = aesara.function([], o, mode=mode_with_gpu)
    y, alpha, A, x, beta = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
    topo = f.maker.fgraph.toposort()
    assert any(isinstance(n.op, GpuGemm) for n in topo)

    # gemm
    float16_data = [
        rand(3, 3).astype("float16"),
        np.asarray(1, dtype=np.float32),
        rand(3, 3).astype("float16"),
        rand(3, 3).astype("float16"),
        np.asarray(0.5, dtype=np.float32),
    ]
    float16_shared = [
        gpuarray_shared_constructor(val, target=test_ctx_name)
        for val in float16_data
    ]
    o = gpugemm_no_inplace(*float16_shared)
    f = aesara.function([], o)
    y, alpha, A, x, beta = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)

    # dot22
    float16_data = [rand(3, 3).astype("float16"), rand(3, 3).astype("float16")]

    float16_shared = [gpuarray_shared_constructor(val) for val in float16_data]
    o = gpu_dot22(*float16_shared)
    f = aesara.function([], o)
    x, y = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), np.dot(x, y))
Ejemplo n.º 3
0
 def test_gpu_cholesky_inplace(self):
     A = self.rand_symmetric(1000)
     A_gpu = gpuarray_shared_constructor(A)
     A_copy = A_gpu.get_value()
     C = GpuMagmaCholesky()(A_gpu)
     fn = aesara.function([], C, mode=mode_with_gpu, updates=[(A_gpu, C)])
     assert any([
         node.op.inplace for node in fn.maker.fgraph.toposort()
         if isinstance(node.op, GpuMagmaCholesky)
     ])
     fn()
     L = A_gpu.get_value()
     utt.assert_allclose(np.dot(L, L.T), A_copy, atol=1e-3)
Ejemplo n.º 4
0
 def test_gpu_matrix_inverse_inplace(self):
     N = 1000
     test_rng = np.random.default_rng(seed=1)
     A_val_gpu = gpuarray_shared_constructor(
         test_rng.random((N, N)).astype("float32") * 2 - 1)
     A_val_copy = A_val_gpu.get_value()
     A_val_gpu_inv = GpuMagmaMatrixInverse()(A_val_gpu)
     fn = aesara.function([],
                          A_val_gpu_inv,
                          mode=mode_with_gpu,
                          updates=[(A_val_gpu, A_val_gpu_inv)])
     assert any([
         node.op.inplace for node in fn.maker.fgraph.toposort()
         if isinstance(node.op, GpuMagmaMatrixInverse)
     ])
     fn()
     utt.assert_allclose(np.eye(N),
                         np.dot(A_val_gpu.get_value(), A_val_copy),
                         atol=5e-3)
Ejemplo n.º 5
0
 def shared(val):
     try:
         return gpuarray_shared_constructor(val)
     except TypeError:
         return aesara.shared(val)
Ejemplo n.º 6
0
def test_sync_update():
    # This test if sync_update work. This can only be tested when
    # there is a GPU.  To test if we really sync, we compare a case we
    # can run in parallel GPU and CPU computation. Then we sync to
    # disable that parallel computation. Then we assert the time is
    # higher.

    # this import needs to go first because it generates the
    # local 'aesara' variable.  You get an UnboundLocalError otherwise.
    import tests.gpuarray.config

    sizes = [100, 500, 1000, 2000, 5000, 10000, 20000, 40000]
    size = sizes[0]
    w = gpuarray_shared_constructor(
        np.random.rand(size, size).astype("float32"),
        "w",
        target=tests.gpuarray.config.test_ctx_name,
    )
    x = gpuarray_shared_constructor(
        np.random.rand(size, size).astype("float32"),
        "x",
        target=tests.gpuarray.config.test_ctx_name,
    )

    updates = [(w, w + np.asarray(0.001, "float32") * dot(x, x))]

    f = function([], updates=updates, mode=tests.gpuarray.config.mode_with_gpu)
    assert len(f.maker.fgraph.apply_nodes) == 1
    assert any(isinstance(n.op, GpuGemm) for n in f.maker.fgraph.apply_nodes)
    # Make sure libgpuarray have compile all kernels
    f()
    f.sync_shared()

    # Find a good size that will take about .5s.
    # This is to make the test more stable across different GPUs.
    size = sizes[-1]
    for i in sizes:
        data = np.random.rand(i, i).astype("float32")
        w.set_value(data)
        x.set_value(data)
        t0 = time.time()
        f()
        f.sync_shared()
        t1 = time.time()
        if (t1 - t0) < 0.5:
            continue
        size = i
        break
    # sync to make sure all computation are done
    f.sync_shared()

    t_0 = time.time()
    for i in range(3):
        f()
        # Sync after each call to see the slowdown from sync.
        f.sync_shared()
        time.sleep(0.5)
    t_1 = time.time()
    for i in range(3):
        f()
        time.sleep(0.5)
    f.sync_shared()
    # Sync to make sure all computation are finished.
    t_2 = time.time()
    d1 = t_1 - t_0
    d2 = t_2 - t_1
    assert d1 > d2, (d1, d2)