Esempio n. 1
0
def test_gradients(backend_tests, custom_args):
    test_idx, f, flag, dim = custom_args

    # backend_tests fixture will parameterize over cpu and gpu
    # backends as well as float16 and float32
    # pull the be and dtype from the actions of the fixture
    be = NervanaObject.be
    dtype = be.default_dtype

    # tensors
    tensors = gen_backend_tensors([np, be],
                                  5, [dim] * 5, [flag] * 5,
                                  dtype=dtype)

    # compare function value and gradient
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])
    numerical_gradient = get_numerical_gradient(f, tensors[0])
    ad = get_audiff_gradient(f, be, tensors[1])
    autodiff_gradient = ad.get_grad_asnumpyarray(tensors[1])

    # TODO: stricter test to fix numerical issues
    assert_tensors_allclose(numpy_func_val,
                            backend_func_val,
                            rtol=1e-2,
                            atol=1e-2)
    assert_tensors_allclose(numerical_gradient,
                            autodiff_gradient,
                            rtol=1e-02,
                            atol=1e-3)

    # cleanup diff tree
    ad.cleanup()
    dtype = None
    be = None
Esempio n. 2
0
def test_cpu_randomstate():
    # run 1
    be = gen_backend(backend='cpu', rng_seed=100)

    a = be.empty((3, 3))
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x1 = a.get()

    # run 2, using reset
    be.rng_reset()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y1 = a.get()

    del(be)

    # run 3, using a new backend
    be = gen_backend(backend='cpu', rng_seed=100)

    a = be.empty((3, 3))
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z1 = a.get()

    # check equality
    # import ipdb; ipdb.set_trace()
    assert_tensors_allclose([x0, x1], [y0, y1], rtol=0., atol=0.)
    assert_tensors_allclose([x0, x1], [z0, z1], rtol=0., atol=0.)
    del(be)
Esempio n. 3
0
    def compare_helper(self, op, inA, inB, dtype):

        numpy_result = self.math_helper(np, op, inA, inB, dtype=np.float32)

        if np.dtype(dtype).kind == 'i' or np.dtype(dtype).kind == 'u':
            numpy_result = np.around(numpy_result)
            numpy_result = numpy_result.clip(
                np.iinfo(dtype).min,
                np.iinfo(dtype).max)
        numpy_result = numpy_result.astype(dtype)

        nervanaGPU_result = self.math_helper(self.gpu,
                                             op,
                                             inA,
                                             inB,
                                             dtype=dtype)
        nervanaCPU_result = self.math_helper(self.cpu,
                                             op,
                                             inA,
                                             inB,
                                             dtype=dtype)

        assert_tensors_allclose(numpy_result,
                                nervanaGPU_result,
                                rtol=0,
                                atol=1e-5)

        if dtype in (np.float64, np.float32, np.float16):
            assert_tensors_allclose(numpy_result,
                                    nervanaCPU_result,
                                    rtol=0,
                                    atol=1e-5)
Esempio n. 4
0
def test_edge_cases():
    """
    Test several edge cases related to min/max bin, and rounding.

    Also test backend dump_hist_data functionality.
    """
    gpuflag = (check_gpu.get_compute_capability(0) >= 3.0)
    if gpuflag is False:
        raise RuntimeError("Device does not have CUDA compute capability 3.0 or greater")
    ng = NervanaGPU()
    nc = NervanaCPU()
    # edge case test
    np_ref = dict()
    inputs = [
        ("edges", np.array([2 ** -48, 2 ** 15], dtype=np.float32)),
        ("rounding", np.array([2 ** 5, 63.99998856, 2 ** 6, 2 ** -3, 2 ** -4,
                               0.11262291, 92.22483826], dtype=np.float32)),
        ("fp16 rounding", np.array([45.21875], dtype=np.float16))
    ]
    for tag, inp in inputs:
        np_ref[tag] = ref_hist(inp)
        for be in [ng, nc]:
            be_inp = be.array(inp)
            be_hist = be_inp.hist(tag)
            assert_tensors_allclose(np_ref[tag], be_hist, err_msg=tag + str(be))

    # dump_hist_data test
    for be in [ng, nc]:
        be_hist_data, be_hist_map = be.dump_hist_data()
        for tag, inp in inputs:
            be_data = be_hist_data[be_hist_map[tag]]
            assert_tensors_allclose(np_ref[tag], be_data, err_msg=tag + str(be))

    del(ng)
    del(nc)
Esempio n. 5
0
def test_gpu_randomstate():
    # run 1
    be = gen_backend(backend='gpu', rng_seed=100)
    a = be.empty((3, 3))

    a[:] = be.rand()  # gpu rand
    x0 = a.get()
    x1 = be.rng.rand(3, 3)  # host rand
    a[:] = be.rand()  # gpu rand
    x2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x3 = a.get()

    assert len(be.context_rand_state_map) == 1 and len(
        be.context_rand_state_alive) == 1
    for ctx in be.context_rand_state_alive:
        assert be.context_rand_state_alive[ctx] is True

    # run 2, using reset
    be.rng_reset()

    for ctx in be.context_rand_state_alive:
        assert be.context_rand_state_alive[ctx] is False

    a[:] = be.rand()
    y0 = a.get()
    y1 = be.rng.rand(3, 3)
    a[:] = be.rand()
    y2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y3 = a.get()

    assert len(be.context_rand_state_map) == 1 and len(
        be.context_rand_state_alive) == 1
    for ctx in be.context_rand_state_alive:
        assert be.context_rand_state_alive[ctx] is True

    del (be)

    # run 3, using a new backend
    be = gen_backend(backend='gpu', rng_seed=100)
    a = be.empty((3, 3))

    a[:] = be.rand()  # gpu rand
    z0 = a.get()
    z1 = be.rng.rand(3, 3)  # host rand
    a[:] = be.rand()  # gpu rand
    z2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z3 = a.get()

    # check equality
    assert_tensors_allclose([x0, x1, x2, x3], [y0, y1, y2, y3],
                            rtol=0.,
                            atol=0.)
    assert_tensors_allclose([x0, x1, x2, x3], [z0, z1, z2, z3],
                            rtol=0.,
                            atol=0.)

    del (be)
Esempio n. 6
0
def test_gradients(backend_tests, custom_args):
    test_idx, f, flag, dim = custom_args

    # backend_tests fixture will parameterize over cpu and gpu
    # backends as well as float16 and float32
    # pull the be and dtype from the actions of the fixture
    be = NervanaObject.be
    dtype = be.default_dtype

    # tensors
    tensors = gen_backend_tensors([np, be], 5, [dim] * 5,
                                  [flag] * 5, dtype=dtype)

    # compare function value and gradient
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])
    numerical_gradient = get_numerical_gradient(f, tensors[0])
    ad = get_audiff_gradient(f, be, tensors[1])
    autodiff_gradient = ad.get_grad_asnumpyarray(tensors[1])

    # TODO: stricter test to fix numerical issues
    assert_tensors_allclose(numpy_func_val,
                            backend_func_val, rtol=1e-2, atol=1e-2)
    assert_tensors_allclose(numerical_gradient,
                            autodiff_gradient, rtol=1e-02, atol=1e-3)

    # cleanup diff tree
    ad.cleanup()
    dtype = None
    be = None
Esempio n. 7
0
def test_gradients(custom_args):
    test_idx, f, flag, dim, dtype, backend_type = custom_args

    be = backend_type(default_dtype=dtype)

    # tensors
    tensors = gen_backend_tensors([np, be],
                                  5, [dim] * 5, [flag] * 5,
                                  dtype=dtype)

    # compare function value and gradient
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])
    numerical_gradient = get_numerical_gradient(f, tensors[0])
    autodiff_gradient = get_audiff_gradient(f, be, tensors[1])

    # TODO: stricter test to fix numerical issues
    assert_tensors_allclose(numpy_func_val,
                            backend_func_val,
                            rtol=0.,
                            atol=1e-2)
    assert_tensors_allclose(numerical_gradient,
                            autodiff_gradient,
                            rtol=1e-02,
                            atol=1e-3)

    if backend_type is NervanaGPU:
        be.ctx.detach()
    del (be)
Esempio n. 8
0
def test_edge_cases():
    """
    Test several edge cases related to min/max bin, and rounding.

    Also test backend dump_hist_data functionality.
    """
    ng = NervanaGPU()
    nc = NervanaCPU()
    # edge case test
    np_ref = dict()
    inputs = [
        ("edges", np.array([2 ** -48, 2 ** 15], dtype=np.float32)),
        ("rounding", np.array([2 ** 5, 63.99998856, 2 ** 6, 2 ** -3, 2 ** -4,
                               0.11262291, 92.22483826], dtype=np.float32)),
        ("fp16 rounding", np.array([45.21875], dtype=np.float16))
    ]
    for tag, inp in inputs:
        np_ref[tag] = ref_hist(inp)
        for be in [ng, nc]:
            be_inp = be.array(inp)
            be_hist = be_inp.hist(tag)
            assert_tensors_allclose(np_ref[tag], be_hist, err_msg=tag + str(be))

    # dump_hist_data test
    for be in [ng, nc]:
        be_hist_data, be_hist_map = be.dump_hist_data()

        for tag, inp in inputs:
            be_data = be_hist_data[be_hist_map[tag]]
            assert_tensors_allclose(np_ref[tag], be_data, err_msg=tag + str(be))
Esempio n. 9
0
def test_cpu_randomstate():
    # run 1
    be = gen_backend(backend='cpu', rng_seed=100)

    a = be.empty((3, 3))
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x1 = a.get()

    # run 2, using reset
    be.rng_reset()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y1 = a.get()

    # run 3, using a new backend
    be = gen_backend(backend='cpu', rng_seed=100)

    a = be.empty((3, 3))
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z0 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z1 = a.get()

    # check equality
    # import ipdb; ipdb.set_trace()
    assert_tensors_allclose([x0, x1], [y0, y1], rtol=0., atol=0.)
    assert_tensors_allclose([x0, x1], [z0, z1], rtol=0., atol=0.)
Esempio n. 10
0
def test_hist(nbin_offset_dim_dtype_inp):
    """
    Compare the nervanagpu and nervanacpu hist implementation to the reference
    implementation above.

    Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp
    tuples that drive the test.
    """

    (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp

    gpuflag = (check_gpu.get_compute_capability(0) >= 3.0)
    if gpuflag is False:
        raise RuntimeError(
            "Device does not have CUDA compute capability 3.0 or greater")

    ng = NervanaGPU(hist_bins=nbins, hist_offset=offset)
    nc = NervanaCPU(hist_bins=nbins, hist_offset=offset)

    np_inp = inp_gen(dim).astype(dtype)
    np_hist = ref_hist(np_inp, nbins=nbins, offset=offset)
    for be in [ng, nc]:
        be_inp = be.array(np_inp, dtype=dtype)
        be_hist = be_inp.hist(name)
        assert_tensors_allclose(np_hist, be_hist)
    del (ng)
    del (nc)
Esempio n. 11
0
def test_hist(nbin_offset_dim_dtype_inp):
    """
    Compare the nervanagpu and nervanacpu hist implementation to the reference
    implementation above.

    Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp
    tuples that drive the test.
    """

    (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp

    gpuflag = (check_gpu.get_compute_capability(0) >= 3.0)
    if gpuflag is False:
        raise RuntimeError("Device does not have CUDA compute capability 3.0 or greater")

    ng = NervanaGPU(hist_bins=nbins, hist_offset=offset)
    nc = NervanaCPU(hist_bins=nbins, hist_offset=offset)

    np_inp = inp_gen(dim).astype(dtype)
    np_hist = ref_hist(np_inp, nbins=nbins, offset=offset)
    for be in [ng, nc]:
        be_inp = be.array(np_inp, dtype=dtype)
        be_hist = be_inp.hist(name)
        assert_tensors_allclose(np_hist, be_hist)
    del(ng)
    del(nc)
Esempio n. 12
0
def test_gpu_randomstate():
    # run 1
    be = gen_backend(backend='gpu', rng_seed=100)
    a = be.empty((3, 3))

    a[:] = be.rand()  # gpu rand
    x0 = a.get()
    x1 = be.rng.rand(3, 3)  # host rand
    a[:] = be.rand()  # gpu rand
    x2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    x3 = a.get()

    assert len(be.context_rand_state_map) == 1 and len(be.context_rand_state_alive) == 1
    for ctx in be.context_rand_state_alive:
        assert be.context_rand_state_alive[ctx] is True

    # run 2, using reset
    be.rng_reset()

    for ctx in be.context_rand_state_alive:
        assert be.context_rand_state_alive[ctx] is False

    a[:] = be.rand()
    y0 = a.get()
    y1 = be.rng.rand(3, 3)
    a[:] = be.rand()
    y2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    y3 = a.get()

    assert len(be.context_rand_state_map) == 1 and len(be.context_rand_state_alive) == 1
    for ctx in be.context_rand_state_alive:
        assert be.context_rand_state_alive[ctx] is True

    del(be)

    # run 3, using a new backend
    be = gen_backend(backend='gpu', rng_seed=100)
    a = be.empty((3, 3))

    a[:] = be.rand()  # gpu rand
    z0 = a.get()
    z1 = be.rng.rand(3, 3)  # host rand
    a[:] = be.rand()  # gpu rand
    z2 = a.get()
    be.make_binary_mask(a, keepthresh=be.rng.rand())
    z3 = a.get()

    # check equality
    assert_tensors_allclose([x0, x1, x2, x3], [y0, y1, y2, y3], rtol=0., atol=0.)
    assert_tensors_allclose([x0, x1, x2, x3], [z0, z1, z2, z3], rtol=0., atol=0.)

    del(be)
Esempio n. 13
0
def test_vs_numpy(custom_args):
    test_idx, f, flag, dim, dtype, backend_type = custom_args

    # backend
    be = gen_backend(backend_type, default_dtype=dtype)

    # tensors
    tensors = gen_backend_tensors(
        [np, be], 5, [dim] * 5, [flag] * 5, dtype=dtype)

    # compare function value and gradient
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])

    assert_tensors_allclose(
        numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
Esempio n. 14
0
def test_vs_numpy(backend_tests, custom_args):
    test_idx, f, flag, dim = custom_args

    # backend
    be = NervanaObject.be
    dtype = be.default_dtype

    # tensors
    tensors = gen_backend_tensors(
        [np, be], 4, [dim] * 4, [flag] * 4, dtype=dtype)

    # compare function values
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])

    assert_tensors_allclose(
        numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
def test_vs_numpy(backend_tests, custom_args):
    test_idx, f, flag, dim = custom_args

    # backend
    be = NervanaObject.be
    dtype = be.default_dtype

    # tensors
    tensors = gen_backend_tensors(
        [np, be], 5, [dim] * 5, [flag] * 5, dtype=dtype)

    # compare function value and gradient
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])

    assert_tensors_allclose(
        numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
Esempio n. 16
0
def test_gradients(custom_args):
    test_idx, f, flag, dim, dtype, backend_type = custom_args
    be = gen_backend(backend_type, default_dtype=dtype)

    # tensors
    tensors = gen_backend_tensors(
        [np, be], 5, [dim] * 5, [flag] * 5, dtype=dtype)

    # compare function value and gradient
    numpy_func_val = call_func(f, np, tensors[0])
    backend_func_val = call_func(f, be, tensors[1])
    numerical_gradient = get_numerical_gradient(f, tensors[0])
    autodiff_gradient = get_audiff_gradient(f, be, tensors[1])

    # TODO: stricter test to fix numerical issues
    assert_tensors_allclose(
        numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
    assert_tensors_allclose(
        numerical_gradient, autodiff_gradient, rtol=1e-02, atol=1e-3)
Esempio n. 17
0
    def compare_helper(self, op, inA, inB, dtype):

        numpy_result = self.math_helper(np, op, inA, inB, dtype=np.float32)

        if np.dtype(dtype).kind == 'i' or np.dtype(dtype).kind == 'u':
            numpy_result = np.around(numpy_result)
            numpy_result = numpy_result.clip(
                np.iinfo(dtype).min, np.iinfo(dtype).max)
        numpy_result = numpy_result.astype(dtype)

        nervanaGPU_result = self.math_helper(
            self.gpu, op, inA, inB, dtype=dtype)
        nervanaCPU_result = self.math_helper(
            self.cpu, op, inA, inB, dtype=dtype)

        assert_tensors_allclose(numpy_result, nervanaGPU_result, rtol=0, atol=1e-5)

        if dtype in (np.float64, np.float32, np.float16):
            assert_tensors_allclose(numpy_result, nervanaCPU_result, rtol=0, atol=1e-5)
Esempio n. 18
0
def test_edge_cases():
    """
    Test several edge cases related to min/max bin, and rounding.

    Also test backend dump_hist_data functionality.
    """
    gpuflag = (check_gpu.get_compute_capability(0) >= 3.0)
    if gpuflag is False:
        raise RuntimeError(
            "Device does not have CUDA compute capability 3.0 or greater")
    ng = NervanaGPU()
    nc = NervanaCPU()
    # edge case test
    np_ref = dict()
    inputs = [
        ("edges", np.array([2**-48, 2**15], dtype=np.float32)),
        ("rounding",
         np.array(
             [2**5, 63.99998856, 2**6, 2**-3, 2**-4, 0.11262291, 92.22483826],
             dtype=np.float32)),
        ("fp16 rounding", np.array([45.21875], dtype=np.float16))
    ]
    for tag, inp in inputs:
        np_ref[tag] = ref_hist(inp)
        for be in [ng, nc]:
            be_inp = be.array(inp)
            be_hist = be_inp.hist(tag)
            assert_tensors_allclose(np_ref[tag],
                                    be_hist,
                                    err_msg=tag + str(be))

    # dump_hist_data test
    for be in [ng, nc]:
        be_hist_data, be_hist_map = be.dump_hist_data()
        for tag, inp in inputs:
            be_data = be_hist_data[be_hist_map[tag]]
            assert_tensors_allclose(np_ref[tag],
                                    be_data,
                                    err_msg=tag + str(be))

    del (ng)
    del (nc)
Esempio n. 19
0
def test_hist(nbin_offset_dim_dtype_inp):
    """
    Compare the nervanagpu and nervanacpu hist implementation to the reference
    implementation above.

    Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp
    tuples that drive the test.
    """

    (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp

    ng = NervanaGPU(hist_bins=nbins, hist_offset=offset)
    nc = NervanaCPU(hist_bins=nbins, hist_offset=offset)

    np_inp = inp_gen(dim).astype(dtype)
    np_hist = ref_hist(np_inp, nbins=nbins, offset=offset)
    for be in [ng, nc]:
        be_inp = be.array(np_inp, dtype=dtype)
        be_hist = be_inp.hist(name)
        assert_tensors_allclose(np_hist, be_hist)
Esempio n. 20
0
def test_hist(nbin_offset_dim_dtype_inp):
    """
    Compare the nervanagpu and nervanacpu hist implementation to the reference
    implementation above.

    Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp
    tuples that drive the test.
    """

    (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp

    ng = NervanaGPU(hist_bins=nbins, hist_offset=offset)
    nc = NervanaCPU(hist_bins=nbins, hist_offset=offset)

    np_inp = inp_gen(dim).astype(dtype)
    np_hist = ref_hist(np_inp, nbins=nbins, offset=offset)
    for be in [ng, nc]:
        be_inp = be.array(np_inp, dtype=dtype)
        be_hist = be_inp.hist(name)
        assert_tensors_allclose(np_hist, be_hist)
Esempio n. 21
0
def test_copy_transpose(shape_dtype_inp):
    """
    Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp
    tuples that drive the test.
    """

    shape, dtype, (name, inp_gen) = shape_dtype_inp
    ng = NervanaGPU(default_dtype=dtype)
    nc = NervanaCPU(default_dtype=dtype)
    np_inp = inp_gen(shape).astype(dtype)
    ndims = len(shape)

    axes = [None] + list(itt.permutations(range(ndims), ndims))
    axes.remove(tuple(range(ndims)))
    for be, ax in itt.product([ng, nc], axes):
        be_inp = be.array(np_inp, dtype=dtype)
        np_trans = np.transpose(np_inp, axes=ax)
        be_trans = be.zeros(np_trans.shape)
        be.copy_transpose(be_inp, be_trans, axes=ax)
        assert_tensors_allclose(np_trans, be_trans)
    del(ng)
    del(nc)
Esempio n. 22
0
def test_copy_transpose(shape_dtype_inp):
    """
    Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp
    tuples that drive the test.
    """

    shape, dtype, (name, inp_gen) = shape_dtype_inp
    ng = NervanaGPU(default_dtype=dtype)
    nc = NervanaCPU(default_dtype=dtype)
    np_inp = inp_gen(shape).astype(dtype)
    ndims = len(shape)

    axes = [None] + list(itt.permutations(range(ndims), ndims))
    axes.remove(tuple(range(ndims)))
    for be, ax in itt.product([ng, nc], axes):
        be_inp = be.array(np_inp, dtype=dtype)
        np_trans = np.transpose(np_inp, axes=ax)
        be_trans = be.zeros(np_trans.shape)
        be.copy_transpose(be_inp, be_trans, axes=ax)
        assert_tensors_allclose(np_trans, be_trans)
    del (ng)
    del (nc)
Esempio n. 23
0
def test_edge_cases():
    """
    Test several edge cases related to min/max bin, and rounding.

    Also test backend dump_hist_data functionality.
    """
    ng = NervanaGPU()
    nc = NervanaCPU()
    # edge case test
    np_ref = dict()
    inputs = [
        ("edges", np.array([2**-48, 2**15], dtype=np.float32)),
        ("rounding",
         np.array(
             [2**5, 63.99998856, 2**6, 2**-3, 2**-4, 0.11262291, 92.22483826],
             dtype=np.float32)),
        ("fp16 rounding", np.array([45.21875], dtype=np.float16))
    ]
    for tag, inp in inputs:
        np_ref[tag] = ref_hist(inp)
        for be in [ng, nc]:
            be_inp = be.array(inp)
            be_hist = be_inp.hist(tag)
            assert_tensors_allclose(np_ref[tag],
                                    be_hist,
                                    err_msg=tag + str(be))

    # dump_hist_data test
    for be in [ng, nc]:
        be_hist_data, be_hist_map = be.dump_hist_data()

        for tag, inp in inputs:
            be_data = be_hist_data[be_hist_map[tag]]
            assert_tensors_allclose(np_ref[tag],
                                    be_data,
                                    err_msg=tag + str(be))
Esempio n. 24
0
def pool_helper(dtype, ones, cpu, repeat, alpha, beta, ng, pool, config, op):

    err_string = "Error in dtype: '%s' op: '%s' config: '%s'" % (str(dtype),
                                                                 op, config)

    dimI = pool.dimI
    dimO = pool.dimO

    # colapse pooling dimensions into one
    # this allows for easy cpu pooling in numpy
    def slicable(dim, pad=0):
        dim0 = reduce(mul, dim[:-1], 1) + pad
        return (dim0, dim[-1])

    # cpu input arrays
    # Note that we truncte these to 16 bits so that the cpu and gpu will agree on an index if there is a tie.
    if ones:
        cpuI = np.ones(slicable(dimI), dtype=np.float32)
        cpuB = np.ones(slicable(dimI), dtype=np.float32)
        cpuE = np.ones(dimO, dtype=np.float32)
        cpuO = np.ones(dimO, dtype=np.float32)

    else:
        cpuI = np.random.uniform(-1.0, 1.0,
                                 slicable(dimI)).astype(np.float16).astype(
                                     np.float32)  #.astype(np.float16)
        cpuB = np.random.uniform(-1.0, 1.0, slicable(dimI)).astype(
            np.float16).astype(np.float32)
        cpuE = np.random.uniform(-1.0, 1.0,
                                 dimO).astype(np.float16).astype(np.float32)
        cpuO = np.random.uniform(-1.0, 1.0,
                                 dimO).astype(np.float16).astype(np.float32)

    cpuA = np.empty(dimO, dtype=np.int32)

    # give gpu the input array without zero padding (not needed)
    devI = ng.array(cpuI.reshape(dimI), dtype=dtype)
    devB = ng.array(cpuB.reshape(dimI), dtype=dtype)
    devE = ng.array(cpuE, dtype=dtype)
    devO = ng.array(cpuO, dtype=dtype)
    devA = ng.empty(dimO, dtype=np.uint8)

    ng.fprop_pool(pool,
                  devI,
                  devO,
                  devA,
                  alpha=alpha,
                  beta=beta,
                  repeat=repeat)

    ng.bprop_pool(pool,
                  devE,
                  devB,
                  devA,
                  alpha=alpha,
                  beta=beta,
                  repeat=repeat)

    cpuO *= beta
    cpuB *= beta

    def pixel_indices(kj, mt, pr, qs):

        C = pool.C
        J, T, R, S = pool.JTRS
        D, H, W = pool.DHW
        HW = H * W
        DHW = D * H * W
        idx = []

        for j in range(J):
            c = kj + j
            ci = c * DHW
            cb = c >= 0 and c < C

            for t in range(T):
                z = mt + t
                zi = ci + z * HW
                zb = cb and z >= 0 and z < D

                for r in range(R):
                    y = pr + r
                    yi = zi + y * W
                    yb = zb and y >= 0 and y < H

                    for s in range(S):
                        x = qs + s
                        if yb and x >= 0 and x < W:
                            xi = yi + x
                            idx.append(xi)
        return idx

    # numpy pooling implementation
    if cpu:

        op = pool.op
        C = pool.C
        K = pool.K
        N = pool.N
        M, P, Q = pool.MPQ
        pad_j, pad_d, pad_h, pad_w = pool.padding
        str_j, str_d, str_h, str_w = pool.strides

        for k in range(K):
            kj = k * str_j - pad_j

            for m in range(M):
                mt = m * str_d - pad_d

                for p in range(P):
                    pr = p * str_h - pad_h

                    for q in range(Q):
                        qs = q * str_w - pad_w

                        idx = pixel_indices(kj, mt, pr, qs)
                        # print idx
                        # exit()

                        if op == "max":

                            #set_trace()
                            cpuO[k, m, p,
                                 q, :] += np.max(cpuI[idx, :], axis=0) * alpha

                            b_idx = np.argmax(cpuI[idx, :], axis=0)
                            cpuA[k, m, p, q, :] = b_idx.astype(np.int32)

                            # There's probably a more elegant numpy way to do this..
                            for n in range(N):
                                cpuB[idx[b_idx[n]],
                                     n] += cpuE[k, m, p, q, n] * alpha

                        elif op == "avg":
                            cpuO[k, m, p,
                                 q, :] += np.mean(cpuI[idx, :], axis=0) * alpha

                            cpuB[idx, :] += cpuE[k, m, p, q, :] * (
                                1.0 / len(idx)) * alpha

                        # bprop not implemented yet
                        elif op == "l2":
                            cpuO[k, m, p, q, :] = np.sqrt(
                                np.sum(cpuI[idx, :]**2, axis=0))

        # drop zero padding
        cpuI = cpuI.reshape(dimI)
        cpuB = cpuB.reshape(dimI)

        devA = devA.get().astype(np.int32)
        devO = devO.get().astype(np.float32)
        devB = devB.get().astype(np.float32)

        difA = np.absolute(cpuA - devA)

        # np.savetxt("out_cpuB.txt", cpuB.reshape((-1,pool.N))[:,0:8], fmt='%5.2f')
        # np.savetxt("out_devB.txt", devB.reshape((-1,pool.N))[:,0:8], fmt='%5.2f')

        difO = np.absolute(cpuO - devO)
        maxD = difO.max()
        maxO = np.absolute(cpuO).max()
        print("difO max: %.6f cpuO max: %5.2f ratio: %.6f" %
              (maxD, maxO, maxD / maxO))
        assert_tensors_allclose(cpuO,
                                devO,
                                rtol=0,
                                atol=1e-2,
                                err_msg="fprop:" + err_string)

        difB = np.absolute(cpuB - devB)
        maxD = difB.max()
        maxB = np.absolute(cpuB).max()
        print("difB max: %.6f cpuB max: %5.2f ratio: %.6f" %
              (maxD, maxB, maxD / maxB))
        assert_tensors_allclose(cpuB,
                                devB,
                                rtol=0,
                                atol=1e-2,
                                err_msg="bprop:" + err_string)
Esempio n. 25
0
def test_batched_dot():
    np.set_printoptions(threshold=8192 * 4, linewidth=600,
                        formatter={'int': lambda x: "%2d" % x, 'float': lambda x: "%2.0f" % x})

    ng = NervanaGPU(stochastic_round=False, bench=1)
    nc = NervanaCPU()

    dtype = np.float32  # np.float16 or np.float32

    X = 100   # Batch Size
    N = 32   # Minibatch Size
    C = 1536  # Input  Features
    K = 768  # Output Features

    cpuI, cpuE, cpuW = setup_test_data(X, N, C, K, dtype)

    ngO, ngB, ngU = run_batched_dot(ng, cpuI, cpuE, cpuW, X, dtype)
    ncO, ncB, ncU = run_batched_dot(nc, cpuI, cpuE, cpuW, X, dtype)
    npO, npB, npU = run_batched_dot(np, cpuI, cpuE, cpuW, X, dtype)

    # set_trace()
    assert_tensors_allclose(npO, ngO, rtol=0, atol=1e-3)
    assert_tensors_allclose(npB, ngB, rtol=0, atol=1e-3)
    assert_tensors_allclose(npU, ngU, rtol=0, atol=1e-3)

    assert_tensors_allclose(npO, ncO, rtol=0, atol=1e-3)
    assert_tensors_allclose(npB, ncB, rtol=0, atol=1e-3)
    assert_tensors_allclose(npU, ncU, rtol=0, atol=1e-3)

    ng.ctx.detach()
    del(ng)
Esempio n. 26
0
def pool_helper(dtype, ones, cpu, repeat, alpha, beta, ng, pool, config, op):

    err_string = "Error in dtype: '%s' op: '%s' config: '%s'" % (
        str(dtype), op, config)

    dimI = pool.dimI
    dimO = pool.dimO

    # colapse pooling dimensions into one
    # this allows for easy cpu pooling in numpy
    def slicable(dim, pad=0):
        dim0 = reduce(mul, dim[:-1], 1) + pad
        return (dim0, dim[-1])

    # cpu input arrays
    # Note that we truncte these to 16 bits so that the cpu and gpu will agree
    # on an index if there is a tie.
    if ones:
        cpuI = np.ones(slicable(dimI), dtype=np.float32)
        cpuB = np.ones(slicable(dimI), dtype=np.float32)
        cpuE = np.ones(dimO, dtype=np.float32)
        cpuO = np.ones(dimO, dtype=np.float32)

    else:
        # .astype(np.float16)
        cpuI = np.random.uniform(-1.0, 1.0, slicable(dimI)
                                 ).astype(np.float16).astype(np.float32)
        cpuB = np.random.uniform(-1.0, 1.0, slicable(dimI)
                                 ).astype(np.float16).astype(np.float32)
        cpuE = np.random.uniform(-1.0, 1.0,
                                 dimO).astype(np.float16).astype(np.float32)
        cpuO = np.random.uniform(-1.0, 1.0,
                                 dimO).astype(np.float16).astype(np.float32)

    cpuA = np.empty(dimO, dtype=np.int32)

    # give gpu the input array without zero padding (not needed)
    devI = ng.array(cpuI.reshape(dimI), dtype=dtype)
    devB = ng.array(cpuB.reshape(dimI), dtype=dtype)
    devE = ng.array(cpuE, dtype=dtype)
    devO = ng.array(cpuO, dtype=dtype)
    devA = ng.empty(dimO, dtype=np.uint8)

    ng.fprop_pool(
        pool, devI, devO, devA, alpha=alpha, beta=beta, repeat=repeat)

    ng.bprop_pool(
        pool, devE, devB, devA, alpha=alpha, beta=beta, repeat=repeat)

    cpuO *= beta
    cpuB *= beta

    def pixel_indices(kj, mt, pr, qs):

        C = pool.C
        J, T, R, S = pool.JTRS
        D, H, W = pool.DHW
        HW = H*W
        DHW = D*H*W
        idx = []

        for j in range(J):
            c = kj + j
            ci = c*DHW
            cb = c >= 0 and c < C

            for t in range(T):
                z = mt + t
                zi = ci + z*HW
                zb = cb and z >= 0 and z < D

                for r in range(R):
                    y = pr + r
                    yi = zi + y*W
                    yb = zb and y >= 0 and y < H

                    for s in range(S):
                        x = qs + s
                        if yb and x >= 0 and x < W:
                            xi = yi + x
                            idx.append(xi)
        return idx

    # numpy pooling implementation
    if cpu:

        op = pool.op
        K = pool.K
        N = pool.N
        M, P, Q = pool.MPQ
        pad_j, pad_d, pad_h, pad_w = pool.padding
        str_j, str_d, str_h, str_w = pool.strides

        for k in range(K):
            kj = k*str_j - pad_j

            for m in range(M):
                mt = m*str_d - pad_d

                for p in range(P):
                    pr = p*str_h - pad_h

                    for q in range(Q):
                        qs = q*str_w - pad_w

                        idx = pixel_indices(kj, mt, pr, qs)
                        # print idx
                        # exit()

                        if op == "max":

                            # set_trace()
                            cpuO[
                                k, m, p, q, :] += np.max(cpuI[idx, :], axis=0) * alpha

                            b_idx = np.argmax(cpuI[idx, :], axis=0)
                            cpuA[k, m, p, q, :] = b_idx.astype(np.int32)

                            # There's probably a more elegant numpy way to do
                            # this..
                            for n in range(N):
                                cpuB[
                                    idx[b_idx[n]], n] += cpuE[k, m, p, q, n] * alpha

                        elif op == "avg":
                            cpuO[
                                k, m, p, q, :] += np.mean(cpuI[idx, :], axis=0) * alpha

                            cpuB[idx, :] += cpuE[k, m, p, q, :] * \
                                (1.0/len(idx)) * alpha

                        # bprop not implemented yet
                        elif op == "l2":
                            cpuO[k, m, p, q, :] = np.sqrt(
                                np.sum(cpuI[idx, :]**2, axis=0))

        # drop zero padding
        cpuI = cpuI.reshape(dimI)
        cpuB = cpuB.reshape(dimI)

        devA = devA.get().astype(np.int32)
        devO = devO.get().astype(np.float32)
        devB = devB.get().astype(np.float32)

        # difA = np.absolute(cpuA - devA)

        # np.savetxt("out_cpuB.txt", cpuB.reshape((-1,pool.N))[:,0:8], fmt='%5.2f')
        # np.savetxt("out_devB.txt", devB.reshape((-1,pool.N))[:,0:8], fmt='%5.2f')

        difO = np.absolute(cpuO - devO)
        maxD = difO.max()
        maxO = np.absolute(cpuO).max()
        print("difO max: %.6f cpuO max: %5.2f ratio: %.6f" %
              (maxD, maxO, maxD / maxO))
        assert_tensors_allclose(
            cpuO, devO, rtol=0, atol=1e-2, err_msg="fprop:" + err_string)

        difB = np.absolute(cpuB - devB)
        maxD = difB.max()
        maxB = np.absolute(cpuB).max()
        print("difB max: %.6f cpuB max: %5.2f ratio: %.6f" %
              (maxD, maxB, maxD / maxB))
        assert_tensors_allclose(
            cpuB, devB, rtol=0, atol=1e-2, err_msg="bprop:" + err_string)
Esempio n. 27
0
def test_slicing(fargs_tests):
    dims, dtype = fargs_tests

    gpu = NervanaGPU(default_dtype=dtype)
    cpu = NervanaCPU(default_dtype=dtype)

    array_np = np.random.uniform(-1, 1, dims).astype(dtype)
    array_ng = gpu.array(array_np, dtype=dtype)
    array_nc = cpu.array(array_np, dtype=dtype)

    assert_tensors_allclose(array_ng[0], array_nc[0], rtol=0, atol=1e-3)
    assert_tensors_allclose(array_ng[-1], array_nc[-1], rtol=0, atol=1e-3)
    assert_tensors_allclose(array_ng[0, :], array_nc[0, :], rtol=0, atol=1e-3)
    assert_tensors_allclose(array_ng[0:], array_nc[0:], rtol=0, atol=1e-3)
    assert_tensors_allclose(array_ng[:-1], array_nc[:-1], rtol=0, atol=1e-3)
    assert_tensors_allclose(array_ng[:, 0], array_nc[:, 0], rtol=0, atol=1e-3)
    assert_tensors_allclose(array_ng[:, 0:1], array_nc[:, 0:1], rtol=0, atol=1e-3)
    assert_tensors_allclose(array_ng[-1, 0:], array_nc[-1:, 0:], rtol=0, atol=1e-3)

    array_ng[0] = 0
    array_nc[0] = 0

    assert_tensors_allclose(array_ng, array_nc, rtol=0, atol=1e-3)

    del(gpu)