예제 #1
0
def test_expdist_ref():

    size = numpy.int32(100)
    ndim = numpy.int32(2)
    cost, A, B, scale_A, scale_B = generate_inputs(size, size, ndim, 1)

    arguments = [cost, A, B, size, size, ndim, scale_A, scale_B]

    kernel_string = get_kernel_path('expdist') + 'expdist_c.cu'

    cost = call_reference_function(size, size, ndim, A, B, scale_A, scale_B,
                                   cost)

    print("cost")
    print(cost)

    print("A")
    print(A)
    print("B")
    print(B)
    print("scale_A")
    print(scale_A)
    print("scale_B")
    print(scale_B)

    assert 4000.0 < cost and cost < 6000.0
예제 #2
0
def test_expdist_ref():

    size = numpy.int32(100)
    ndim = numpy.int32(2)
    cost, A, B, scale_A, scale_B = generate_inputs(size, ndim, 1)

    arguments = [cost, A, B, size, size, ndim, scale_A, scale_B]

    with open(get_kernel_path() + 'expdist_c.cpp', 'r') as f:
        kernel_string = f.read()

    answer = run_kernel("call_expdist",
                        kernel_string,
                        size,
                        arguments, {},
                        lang="C",
                        compiler_options=['-I' + get_kernel_path()])

    cost = call_reference_function(size, ndim, A, B, scale_A, scale_B, cost)

    print("cost")
    print(cost)

    print("A")
    print(A)
    print("B")
    print(B)
    print("scale_A")
    print(scale_A)
    print("scale_B")
    print(scale_B)

    assert 100.0 < cost and cost < 200.0
예제 #3
0
def test_expdist_ref3D():

    size = numpy.int32(100)
    ndim = numpy.int32(3)
    cost, A, B, scale_A, scale_B = generate_inputs(size, size, ndim, 1)

    arguments = [cost, A, B, size, size, ndim, scale_A, scale_B]

    #with open(get_kernel_path('expdist')+'expdist_c.cu', 'r') as f:
    #    kernel_string = f.read()
    kernel_string = get_kernel_path('expdist') + 'expdist_c.cu'

    answer = run_kernel("call_expdist3D_double",
                        kernel_string,
                        size,
                        arguments, {},
                        lang="C",
                        compiler_options=[
                            '-I' + get_kernel_path('expdist'),
                            "-Wno-deprecated-gpu-targets"
                        ],
                        compiler="nvcc")

    print("cost")
    print(answer[0])
    cost = answer[0]

    print("cost computed by Python reference func:")
    python_cost = bhatdist_python_reference(*arguments)
    print(python_cost)

    print("A")
    print(A)
    print("B")
    print(B)
    print("scale_A")
    print(scale_A)
    print("scale_B")
    print(scale_B)

    assert np.isclose(answer[0], python_cost)

    assert 2000 < cost and cost < 4000
예제 #4
0
def test_hostfunction():

    #setup test input
    allocation_size = numpy.int32(3000)
    size = numpy.int32(2000)
    ndim = numpy.int32(2)

    nblocks = numpy.int32(
        numpy.ceil(size / (32 * 4)) * numpy.ceil(size / (4 * 4)))

    cost, A, B, scale_A, scale_B = generate_inputs(allocation_size, ndim,
                                                   nblocks)

    #call the reference function
    ref_cost = call_reference_function(size, ndim, A, B, scale_A, scale_B,
                                       cost)

    #call the host function
    arguments = [
        cost, A, B, size, size, ndim, scale_A, scale_B, allocation_size
    ]
    with open(get_kernel_path() + 'expdist.cu', 'r') as f:
        kernel_string = f.read()
    answer = run_kernel("test_GPUExpDistHost",
                        kernel_string,
                        size,
                        arguments, {},
                        lang="C",
                        compiler_options=compiler_options + ['-arch=sm_30'])
    cost = answer[0][0]

    print("reference")
    print(ref_cost)

    print("answer")
    print(cost)

    assert numpy.isclose(ref_cost, cost, atol=1e-5)
예제 #5
0
def test_expdist_kernel(dim=2):

    ndim = numpy.int32(dim)
    m = numpy.int32(103)
    n = numpy.int32(59)

    #block_size_x=32, block_size_y=4, tile_size_x=2, tile_size_y=1, use_shared_mem=1

    params = dict()
    params["block_size_x"] = 32
    params["block_size_y"] = 4
    params["tile_size_x"] = 2
    params["tile_size_y"] = 1
    params["use_shared_mem"] = 1

    nblocks = numpy.int32(
        numpy.ceil(m / float(params["block_size_x"] * params["tile_size_x"])) *
        numpy.ceil(n / float(params["block_size_y"] * params["tile_size_y"])))

    cost, A, B, scale_A, scale_B = generate_inputs(m, n, ndim, nblocks)

    test_against_reference(cost, A, B, scale_A, scale_B, m, n, ndim, nblocks,
                           params)
예제 #6
0
def test_hostfunction_largeN():

    #setup test input
    allocation_size = numpy.int32(1e6)
    size = numpy.int32(40000)
    ndim = numpy.int32(2)

    params = dict()
    params["block_size_x"] = 32
    params["block_size_y"] = 4
    params["tile_size_x"] = 2
    params["tile_size_y"] = 1
    params["use_shared_mem"] = 1

    #compute nblocks for when using the expdist kernel
    nblocks = numpy.int32(
        numpy.ceil(
            size / float(params["block_size_x"] * params["tile_size_x"])) *
        numpy.ceil(
            size / float(params["block_size_y"] * params["tile_size_y"])))

    #ensure that this test actually causes the host code to call the column kernel
    assert nblocks > allocation_size

    #compute the nblocks actually used by the column kernel
    nblocks = numpy.int32(
        numpy.ceil(size /
                   float(params["block_size_x"] * params["tile_size_x"])))

    #generate input data
    cost, A, B, scale_A, scale_B = generate_inputs(allocation_size,
                                                   allocation_size, ndim,
                                                   nblocks)

    #call the ExpDist_column kernel directly for reference
    arguments = [A, B, size, size, scale_A, scale_B, cost]
    grid_div_x = ["block_size_x", "tile_size_x"]
    with open(get_kernel_path('expdist') + 'kernels.cu', 'r') as f:
        kernel_string = f.read()
    answer = run_kernel("ExpDist_column",
                        kernel_string,
                        size,
                        arguments,
                        params,
                        compiler_options=compiler_options,
                        grid_div_x=grid_div_x)
    ref_cost = numpy.sum(answer[6])

    #call the host function
    rot_matrix = numpy.eye(3).astype(numpy.float64)
    arguments = [
        cost, A, B, size, size, ndim, scale_A, scale_B, allocation_size,
        rot_matrix,
        np.int32(0)
    ]
    with open(get_kernel_path('expdist') + 'expdist.cu', 'r') as f:
        kernel_string = f.read()
    answer = run_kernel("test_GPUExpDistHost",
                        kernel_string,
                        size,
                        arguments, {},
                        lang="C",
                        compiler_options=compiler_options + ['-arch=sm_30'])
    cost = answer[0][0]

    print("reference")
    print(ref_cost)

    print("answer")
    print(cost)

    assert numpy.isclose(ref_cost, cost, atol=1e-5)
예제 #7
0
def test_hostfunction(dim=2):

    #setup test input
    ndim = numpy.int32(dim)

    m = numpy.int32(2003)
    n = numpy.int32(1009)
    nblocks = numpy.int32(numpy.ceil(m / (32 * 2)) * numpy.ceil(n / (4 * 4)))

    cost, A, B, scale_A, scale_B = generate_inputs(m, n, ndim, nblocks)
    #host function will do the rotation, so we need to supply the scales indirectly
    scale_B = numpy.absolute(0.01 *
                             numpy.random.randn(n * 2).astype(numpy.float64))
    rotation_matrix = numpy.eye(3).astype(numpy.float64).flatten()

    #mimic Hamid's testcase
    A = numpy.ones_like(A)
    B = 2.0 * numpy.ones_like(B)
    scale_A = 0.1 * numpy.ones_like(scale_A)
    scale_B = 0.1 * numpy.ones_like(scale_B)

    #call the reference function
    #with open(get_kernel_path('expdist')+'expdist_c.cu', 'r') as f:
    #    kernel_string = f.read()
    kernel_string = get_kernel_path('expdist') + 'expdist_c.cu'

    f = "call_expdist"
    scale_B_rot = scale_B
    if ndim == 3:
        #first call the rotate scales kernel
        rotated_scales = numpy.zeros(n * 9).astype(numpy.float64)
        args = [rotated_scales, rotation_matrix, n, scale_B]
        answer = run_kernel("call_rotate_scales_double",
                            kernel_string,
                            1,
                            args, {},
                            lang="C",
                            compiler_options=[
                                '-I' + get_kernel_path('expdist'),
                                "-Wno-deprecated-gpu-targets"
                            ],
                            compiler='nvcc')
        scale_B_rot = answer[0]
        f = "call_expdist3D_double"

    arguments = [cost, A, B, m, n, ndim, scale_A, scale_B_rot]
    answer = run_kernel(f,
                        kernel_string,
                        1,
                        arguments, {},
                        lang="C",
                        compiler_options=[
                            '-I' + get_kernel_path('expdist'),
                            "-Wno-deprecated-gpu-targets"
                        ],
                        compiler='nvcc')

    ref_cost = answer[0][0]

    #call the host function
    arguments = [
        cost, A, B, m, n, ndim, scale_A, scale_B,
        numpy.int32(100000), rotation_matrix,
        np.int32(0)
    ]
    #with open(get_kernel_path('expdist')+'expdist.cu', 'r') as f:
    #    kernel_string = f.read()
    kernel_string = get_kernel_path('expdist') + 'expdist.cu'
    answer = run_kernel("test_GPUExpDistHost",
                        kernel_string,
                        1,
                        arguments, {},
                        lang="C",
                        compiler_options=compiler_options + ['-arch=sm_30'])
    cost = answer[0][0]

    print("reference")
    print(ref_cost)

    print("answer")
    print(cost)

    assert numpy.isclose(ref_cost, cost, atol=1e-5)
예제 #8
0
def test_expdist_kernel_column(dim=2):

    #setup test input
    allocation_size = int(3000)
    ndim = numpy.int32(dim)
    size = numpy.int32(2000)

    params = dict()
    params["block_size_x"] = 32
    params["block_size_y"] = 4
    params["tile_size_x"] = 2
    params["tile_size_y"] = 4
    params["use_shared_mem"] = 1

    nblocks = numpy.int32(
        numpy.ceil(size /
                   float(params["block_size_x"] * params["tile_size_x"])))

    cost, A, B, scale_A, scale_B = generate_inputs(allocation_size, size, ndim,
                                                   nblocks)

    #call the reference function
    ref_cost = call_reference_function(size, size, ndim, A, B, scale_A,
                                       scale_B, cost)

    #call the GPU function
    with open(get_kernel_path('expdist') + 'kernels.cu', 'r') as f:
        kernel_string = f.read()

    arguments = [A, B, size, size, scale_A, scale_B, cost]

    grid_div_x = ["block_size_x", "tile_size_x"]

    if ndim == 2:
        answer = run_kernel("ExpDist_column",
                            kernel_string,
                            size,
                            arguments,
                            params,
                            compiler_options=compiler_options,
                            grid_div_x=grid_div_x)
    else:
        answer = run_kernel("ExpDist_column3D",
                            kernel_string,
                            size,
                            arguments,
                            params,
                            compiler_options=compiler_options,
                            grid_div_x=grid_div_x)

    #collect the results from the first kernel
    cross_term = answer[6]
    print("intermediate cross_term")
    print(cross_term)

    #call the second kernel to reduce the per thread block cross terms to a single value
    out = numpy.zeros(1).astype(numpy.float64)

    arguments = [out, cross_term, size, size, nblocks]
    answer = run_kernel("reduce_cross_term",
                        kernel_string,
                        1,
                        arguments, {"block_size_x": 128},
                        compiler_options=compiler_options,
                        grid_div_x=[])

    #final cross term
    cost = answer[0][0]

    print("reference")
    print(ref_cost)
    print("answer")
    print(cost)

    print("reference")
    print("%30.20e" % ref_cost)
    print("answer")
    print("%30.20e" % cost)

    assert numpy.isclose(ref_cost, cost, atol=1e-5)