コード例 #1
0
def test_edge_cases(device_id):
    """
    Test several edge cases related to min/max bin, and rounding.

    Also test backend dump_hist_data functionality.
    """
    gpuflag = (check_gpu.get_compute_capability(0) >= 3.0)
    if gpuflag is False:
        raise RuntimeError("Device does not have CUDA compute capability 3.0 or greater")
    ng = NervanaGPU(device_id=device_id)
    nc = NervanaCPU()
    # edge case test
    np_ref = dict()
    inputs = [
        ("edges", np.array([2 ** -48, 2 ** 15], dtype=np.float32)),
        ("rounding", np.array([2 ** 5, 63.99998856, 2 ** 6, 2 ** -3, 2 ** -4,
                               0.11262291, 92.22483826], dtype=np.float32)),
        ("fp16 rounding", np.array([45.21875], dtype=np.float16))
    ]
    for tag, inp in inputs:
        np_ref[tag] = ref_hist(inp)
        for be in [ng, nc]:
            be_inp = be.array(inp)
            be_hist = be_inp.hist(tag)
            assert tensors_allclose(np_ref[tag], be_hist), tag + str(be)

    # dump_hist_data test
    for be in [ng, nc]:
        be_hist_data, be_hist_map = be.dump_hist_data()
        for tag, inp in inputs:
            be_data = be_hist_data[be_hist_map[tag]]
            assert tensors_allclose(np_ref[tag], be_data), tag + str(be)

    del(ng)
    del(nc)
コード例 #2
0
ファイル: test_autodiff.py プロジェクト: AdityoSanjaya/neon
def test_gradients(backend_tests, custom_args):
    test_idx, f, flag, dim = custom_args

    # backend_tests fixture will parameterize over cpu and gpu
    # backends as well as float16 and float32
    # pull the be and dtype from the actions of the fixture
    be = NervanaObject.be
    dtype = be.default_dtype

    # tensors
    tensors = gen_backend_tensors([np, be], [dim] * 5, [flag] * 5, dtype=dtype)

    # compare function value and gradient
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])
    numerical_gradient = get_numerical_gradient(f, tensors[0])
    ad = get_audiff_gradient(f, be, tensors[1])
    autodiff_gradient = ad.get_grad_asnumpyarray(tensors[1])

    # TODO: stricter test to fix numerical issues
    assert tensors_allclose(numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
    assert tensors_allclose(numerical_gradient, autodiff_gradient, rtol=1e-02, atol=1e-3)

    # cleanup diff tree
    ad.cleanup()
    dtype = None
    be = None
コード例 #3
0
def test_gradients(backend_tests, custom_args):
    test_idx, f, flag, dim = custom_args

    # backend_tests fixture will parameterize over cpu and gpu
    # backends as well as float16 and float32
    # pull the be and dtype from the actions of the fixture
    be = NervanaObject.be
    dtype = be.default_dtype

    # tensors
    tensors = gen_backend_tensors([np, be], [dim] * 5, [flag] * 5, dtype=dtype)

    # compare function value and gradient
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])
    numerical_gradient = get_numerical_gradient(f, tensors[0])
    ad = get_audiff_gradient(f, be, tensors[1])
    autodiff_gradient = ad.get_grad_asnumpyarray(tensors[1])

    # TODO: stricter test to fix numerical issues
    assert tensors_allclose(numpy_func_val,
                            backend_func_val,
                            rtol=1e-2,
                            atol=1e-2)
    assert tensors_allclose(numerical_gradient,
                            autodiff_gradient,
                            rtol=1e-02,
                            atol=1e-3)

    # cleanup diff tree
    ad.cleanup()
    dtype = None
    be = None
コード例 #4
0
def test_cpu_randomstate():
    # run 1
    be = gen_backend(backend='cpu', rng_seed=100)

    a = be.empty((3, 3))
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x1 = a.get()

    # run 2, using reset
    be.rng_reset()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y1 = a.get()

    del(be)

    # run 3, using a new backend
    be = gen_backend(backend='cpu', rng_seed=100)

    a = be.empty((3, 3))
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z1 = a.get()

    # check equality
    assert tensors_allclose([x0, x1], [y0, y1], rtol=0., atol=0.)
    assert tensors_allclose([x0, x1], [z0, z1], rtol=0., atol=0.)
    del(be)
コード例 #5
0
def test_slicing(fargs_tests, device_id):
    dims, dtype = fargs_tests

    gpu = NervanaGPU(default_dtype=dtype, device_id=device_id)
    cpu = NervanaCPU(default_dtype=dtype)

    array_np = np.random.uniform(-1, 1, dims).astype(dtype)
    array_ng = gpu.array(array_np, dtype=dtype)
    array_nc = cpu.array(array_np, dtype=dtype)

    assert tensors_allclose(array_ng[0], array_nc[0], rtol=0, atol=1e-3)
    assert tensors_allclose(array_ng[-1], array_nc[-1], rtol=0, atol=1e-3)
    assert tensors_allclose(array_ng[0, :], array_nc[0, :], rtol=0, atol=1e-3)
    assert tensors_allclose(array_ng[0:], array_nc[0:], rtol=0, atol=1e-3)
    assert tensors_allclose(array_ng[:-1], array_nc[:-1], rtol=0, atol=1e-3)
    assert tensors_allclose(array_ng[:, 0], array_nc[:, 0], rtol=0, atol=1e-3)
    assert tensors_allclose(array_ng[:, 0:1],
                            array_nc[:, 0:1],
                            rtol=0,
                            atol=1e-3)
    assert tensors_allclose(array_ng[-1, 0:],
                            array_nc[-1:, 0:],
                            rtol=0,
                            atol=1e-3)

    array_ng[0] = 0
    array_nc[0] = 0

    assert tensors_allclose(array_ng, array_nc, rtol=0, atol=1e-3)

    del (gpu)
コード例 #6
0
def test_gpu_randomstate(device_id):
    # run 1
    be = gen_backend(backend='gpu', rng_seed=100, device_id=device_id)
    a = be.empty((3, 3))

    a[:] = be.rand()  # gpu rand
    x0 = a.get()
    x1 = be.rng.rand(3, 3)  # host rand
    a[:] = be.rand()  # gpu rand
    x2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x3 = a.get()

    assert len(be.context_rand_state_map) == 1 and len(be.context_rand_state_alive) == 1
    for ctx in be.context_rand_state_alive:
        assert be.context_rand_state_alive[ctx] is True

    # run 2, using reset
    be.rng_reset()

    a[:] = be.rand()
    y0 = a.get()
    y1 = be.rng.rand(3, 3)
    a[:] = be.rand()
    y2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y3 = a.get()

    assert len(be.context_rand_state_map) == 1 and len(be.context_rand_state_alive) == 1
    for ctx in be.context_rand_state_alive:
        assert be.context_rand_state_alive[ctx] is True

    del(be)

    # run 3, using a new backend
    be = gen_backend(backend='gpu', rng_seed=100, device_id=device_id)
    a = be.empty((3, 3))

    a[:] = be.rand()  # gpu rand
    z0 = a.get()
    z1 = be.rng.rand(3, 3)  # host rand
    a[:] = be.rand()  # gpu rand
    z2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z3 = a.get()

    # check equality
    assert tensors_allclose([x0, x1, x2, x3], [y0, y1, y2, y3], rtol=0., atol=0.)
    assert tensors_allclose([x0, x1, x2, x3], [z0, z1, z2, z3], rtol=0., atol=0.)

    del(be)
コード例 #7
0
def test_hist(nbin_offset_dim_dtype_inp, device_id):
    """
    Compare the nervanagpu and nervanacpu hist implementation to the reference
    implementation above.

    Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp
    tuples that drive the test.
    """

    (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp

    gpuflag = (check_gpu.get_compute_capability(0) >= 3.0)
    if gpuflag is False:
        raise RuntimeError("Device does not have CUDA compute capability 3.0 or greater")

    ng = NervanaGPU(hist_bins=nbins, hist_offset=offset, device_id=device_id)
    nc = NervanaCPU(hist_bins=nbins, hist_offset=offset)

    np_inp = inp_gen(dim).astype(dtype)
    np_hist = ref_hist(np_inp, nbins=nbins, offset=offset)
    for be in [ng, nc]:
        be_inp = be.array(np_inp, dtype=dtype)
        be_hist = be_inp.hist(name)
        assert tensors_allclose(np_hist, be_hist)
    del(ng)
    del(nc)
コード例 #8
0
def test_batched_dot(device_id):
    np.set_printoptions(threshold=8192 * 4,
                        linewidth=600,
                        formatter={
                            'int': lambda x: "%2d" % x,
                            'float': lambda x: "%2.0f" % x
                        })

    ng = NervanaGPU(stochastic_round=False, bench=1, device_id=device_id)
    nc = NervanaCPU()

    dtype = np.float32  # np.float16 or np.float32

    X = 100  # Batch Size
    N = 32  # Minibatch Size
    C = 1536  # Input  Features
    K = 768  # Output Features

    cpuI, cpuE, cpuW = setup_test_data(X, N, C, K, dtype)

    ngO, ngB, ngU = run_batched_dot(ng, cpuI, cpuE, cpuW, X, dtype)
    ncO, ncB, ncU = run_batched_dot(nc, cpuI, cpuE, cpuW, X, dtype)
    npO, npB, npU = run_batched_dot(np, cpuI, cpuE, cpuW, X, dtype)

    # set_trace()
    assert tensors_allclose(npO, ngO, rtol=0, atol=1e-3)
    assert tensors_allclose(npB, ngB, rtol=0, atol=1e-3)
    assert tensors_allclose(npU, ngU, rtol=0, atol=1e-3)

    assert tensors_allclose(npO, ncO, rtol=0, atol=1e-3)
    assert tensors_allclose(npB, ncB, rtol=0, atol=1e-3)
    assert tensors_allclose(npU, ncU, rtol=0, atol=1e-3)

    del (ng)
コード例 #9
0
def test_vs_numpy(backend_tests, custom_args):
    test_idx, f, flag, dim = custom_args

    # backend
    be = NervanaObject.be
    dtype = be.default_dtype

    # tensors
    tensors = gen_backend_tensors([np, be], [dim] * 4, [flag] * 4, dtype=dtype)

    # compare function values
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])

    assert tensors_allclose(numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
コード例 #10
0
def test_copy_transpose(shape_dtype_inp, device_id):
    """
    Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp
    tuples that drive the test.
    """

    shape, dtype, (name, inp_gen) = shape_dtype_inp
    ng = NervanaGPU(default_dtype=dtype, device_id=device_id)
    nc = NervanaCPU(default_dtype=dtype)
    np_inp = inp_gen(shape).astype(dtype)
    ndims = len(shape)

    axes = [None] + list(itt.permutations(range(ndims), ndims))
    axes.remove(tuple(range(ndims)))
    for be, ax in itt.product([ng, nc], axes):
        be_inp = be.array(np_inp, dtype=dtype)
        np_trans = np.transpose(np_inp, axes=ax)
        be_trans = be.zeros(np_trans.shape)
        be.copy_transpose(be_inp, be_trans, axes=ax)
        assert tensors_allclose(np_trans, be_trans)
    del (ng)
    del (nc)
def pool_helper(dtype, ones, cpu, repeat, alpha, beta, ng, pool, config, op):

    err_string = "Error in dtype: '%s' op: '%s' config: '%s'" % (str(dtype),
                                                                 op, config)

    dimI = pool.dimI
    dimO = pool.dimO

    # colapse pooling dimensions into one
    # this allows for easy cpu pooling in numpy
    def slicable(dim, pad=0):
        dim0 = reduce(mul, dim[:-1], 1) + pad
        return (dim0, dim[-1])

    # cpu input arrays
    # Note that we truncte these to 16 bits so that the cpu and gpu will agree
    # on an index if there is a tie.
    if ones:
        cpuI = np.ones(slicable(dimI), dtype=np.float32)
        cpuB = np.ones(slicable(dimI), dtype=np.float32)
        cpuE = np.ones(dimO, dtype=np.float32)
        cpuO = np.ones(dimO, dtype=np.float32)

    else:
        # .astype(np.float16)
        cpuI = np.random.uniform(-1.0, 1.0, slicable(dimI)).astype(
            np.float16).astype(np.float32)
        cpuB = np.random.uniform(-1.0, 1.0, slicable(dimI)).astype(
            np.float16).astype(np.float32)
        cpuE = np.random.uniform(-1.0, 1.0,
                                 dimO).astype(np.float16).astype(np.float32)
        cpuO = np.random.uniform(-1.0, 1.0,
                                 dimO).astype(np.float16).astype(np.float32)

    cpuA = np.empty(dimO, dtype=np.int32)

    # give gpu the input array without zero padding (not needed)
    devI = ng.array(cpuI.reshape(dimI), dtype=dtype)
    devB = ng.array(cpuB.reshape(dimI), dtype=dtype)
    devE = ng.array(cpuE, dtype=dtype)
    devO = ng.array(cpuO, dtype=dtype)
    devA = ng.empty(dimO, dtype=np.uint8)

    ng.fprop_pool(pool,
                  devI,
                  devO,
                  devA,
                  alpha=alpha,
                  beta=beta,
                  repeat=repeat)

    ng.bprop_pool(pool,
                  devE,
                  devB,
                  devA,
                  alpha=alpha,
                  beta=beta,
                  repeat=repeat)

    cpuO *= beta
    cpuB *= beta

    def pixel_indices(kj, mt, pr, qs):

        C = pool.C
        J, T, R, S = pool.JTRS
        D, H, W = pool.DHW
        HW = H * W
        DHW = D * H * W
        idx = []

        for j in range(J):
            c = kj + j
            ci = c * DHW
            cb = c >= 0 and c < C

            for t in range(T):
                z = mt + t
                zi = ci + z * HW
                zb = cb and z >= 0 and z < D

                for r in range(R):
                    y = pr + r
                    yi = zi + y * W
                    yb = zb and y >= 0 and y < H

                    for s in range(S):
                        x = qs + s
                        if yb and x >= 0 and x < W:
                            xi = yi + x
                            idx.append(xi)
        return idx

    # numpy pooling implementation
    if cpu:

        op = pool.op
        K = pool.K
        N = pool.N
        M, P, Q = pool.MPQ
        pad_j, pad_d, pad_h, pad_w = pool.padding
        str_j, str_d, str_h, str_w = pool.strides

        for k in range(K):
            kj = k * str_j - pad_j

            for m in range(M):
                mt = m * str_d - pad_d

                for p in range(P):
                    pr = p * str_h - pad_h

                    for q in range(Q):
                        qs = q * str_w - pad_w

                        idx = pixel_indices(kj, mt, pr, qs)
                        # print idx
                        # exit()

                        if op == "max":

                            # set_trace()
                            cpuO[k, m, p,
                                 q, :] += np.max(cpuI[idx, :], axis=0) * alpha

                            b_idx = np.argmax(cpuI[idx, :], axis=0)
                            cpuA[k, m, p, q, :] = b_idx.astype(np.int32)

                            # There's probably a more elegant numpy way to do
                            # this..
                            for n in range(N):
                                cpuB[idx[b_idx[n]],
                                     n] += cpuE[k, m, p, q, n] * alpha

                        elif op == "avg":
                            cpuO[k, m, p,
                                 q, :] += np.mean(cpuI[idx, :], axis=0) * alpha

                            cpuB[idx, :] += cpuE[k, m, p, q, :] * \
                                (1.0/len(idx)) * alpha

                        # bprop not implemented yet
                        elif op == "l2":
                            cpuO[k, m, p, q, :] = np.sqrt(
                                np.sum(cpuI[idx, :]**2, axis=0))

        # drop zero padding
        cpuI = cpuI.reshape(dimI)
        cpuB = cpuB.reshape(dimI)

        devA = devA.get().astype(np.int32)
        devO = devO.get().astype(np.float32)
        devB = devB.get().astype(np.float32)

        # difA = np.absolute(cpuA - devA)

        # np.savetxt("out_cpuB.txt", cpuB.reshape((-1,pool.N))[:,0:8], fmt='%5.2f')
        # np.savetxt("out_devB.txt", devB.reshape((-1,pool.N))[:,0:8], fmt='%5.2f')

        difO = np.absolute(cpuO - devO)
        maxD = difO.max()
        maxO = np.absolute(cpuO).max()
        print("difO max: %.6f cpuO max: %5.2f ratio: %.6f" %
              (maxD, maxO, maxD / maxO))
        assert tensors_allclose(cpuO, devO, rtol=0,
                                atol=1e-2), "fprop:" + err_string

        difB = np.absolute(cpuB - devB)
        maxD = difB.max()
        maxB = np.absolute(cpuB).max()
        print("difB max: %.6f cpuB max: %5.2f ratio: %.6f" %
              (maxD, maxB, maxD / maxB))
        assert tensors_allclose(cpuB, devB, rtol=0,
                                atol=1e-2), "bprop:" + err_string