예제 #1
0
def test_gmem_access_counter_bitwise():

    knl = lp.make_kernel(
            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                """
                c[i, j, k] = (a[i,j,k] | 1) + (b[i,j,k] & 1)
                e[i, k] = (g[i,k] ^ k)*(~h[i,k+1]) + (g[i, k] << (h[i,k] >> k))
                """
            ],
            name="bitwise", assumptions="n,m,l >= 1")

    knl = lp.add_and_infer_dtypes(
            knl, dict(
                a=np.int32, b=np.int32,
                g=np.int32, h=np.int32))

    poly = get_gmem_access_poly(knl)
    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}
    i32 = poly[
                    (np.dtype(np.int32), 'uniform', 'load')
                    ].eval_with_dict(params)
    assert i32 == 4*n*m+2*n*m*l

    i32 = poly[
                    (np.dtype(np.int32), 'uniform', 'store')
                    ].eval_with_dict(params)
    assert i32 == n*m+n*m*l
예제 #2
0
def test_gmem_access_counter_logic():

    knl = lp.make_kernel(
            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                """
                e[i,k] = if(not(k<l-2) and k>6 or k/2==l, g[i,k]*2, g[i,k]+h[i,k]/2)
                """
            ],
            name="logic", assumptions="n,m,l >= 1")

    knl = lp.add_and_infer_dtypes(knl, dict(g=np.float32, h=np.float64))
    poly = get_gmem_access_poly(knl)
    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}
    f32 = poly[
                    (np.dtype(np.float32), 'uniform', 'load')
                    ].eval_with_dict(params)
    f64 = poly[
                    (np.dtype(np.float64), 'uniform', 'load')
                    ].eval_with_dict(params)
    assert f32 == 2*n*m
    assert f64 == n*m

    f64 = poly[
                    (np.dtype(np.float64), 'uniform', 'store')
                    ].eval_with_dict(params)
    assert f64 == n*m
예제 #3
0
def test_all_counters_parallel_matmul():

    knl = lp.make_kernel(
            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                "c[i, j] = sum(k, a[i, k]*b[k, j])"
            ],
            name="matmul", assumptions="n,m,l >= 1")
    knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
    knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.0")

    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}

    barrier_count = get_barrier_poly(knl).eval_with_dict(params)
    assert barrier_count == 0

    op_map = get_op_poly(knl)
    f32mul = op_map[
                        (np.dtype(np.float32), 'mul')
                        ].eval_with_dict(params)
    f32add = op_map[
                        (np.dtype(np.float32), 'add')
                        ].eval_with_dict(params)
    i32ops = op_map[
                        (np.dtype(np.int32), 'add')
                        ].eval_with_dict(params)
    i32ops += op_map[
                        (np.dtype(np.int32), 'mul')
                        ].eval_with_dict(params)

    assert f32mul+f32add == n*m*l*2
    assert i32ops == n*m*l*4 + l*n*4

    subscript_map = get_gmem_access_poly(knl)
    f32uncoal = subscript_map[
                        (np.dtype(np.float32), 'nonconsecutive', 'load')
                        ].eval_with_dict(params)
    f32coal = subscript_map[
                        (np.dtype(np.float32), 'consecutive', 'load')
                        ].eval_with_dict(params)

    assert f32uncoal == n*m*l
    assert f32coal == n*m*l

    f32coal = subscript_map[
                        (np.dtype(np.float32), 'consecutive', 'store')
                        ].eval_with_dict(params)

    assert f32coal == n*l
예제 #4
0
def test_gmem_access_counter_consec():

    knl = lp.make_kernel(
            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                """
            c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
            e[i, k] = g[i,k]*(2+h[i,k])
            """
            ],
            name="consec", assumptions="n,m,l >= 1")
    knl = lp.add_and_infer_dtypes(knl, dict(
                a=np.float32, b=np.float32, g=np.float64, h=np.float64))
    knl = lp.tag_inames(knl, {"k": "l.0", "i": "g.0", "j": "g.1"})

    poly = get_gmem_access_poly(knl)
    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}

    f64consec = poly[
                    (np.dtype(np.float64), 'consecutive', 'load')
                    ].eval_with_dict(params)
    f32consec = poly[
                    (np.dtype(np.float32), 'consecutive', 'load')
                    ].eval_with_dict(params)
    assert f64consec == 2*n*m
    assert f32consec == 3*n*m*l

    f64consec = poly[
                    (np.dtype(np.float64), 'consecutive', 'store')
                    ].eval_with_dict(params)
    f32consec = poly[
                    (np.dtype(np.float32), 'consecutive', 'store')
                    ].eval_with_dict(params)
    assert f64consec == n*m
    assert f32consec == n*m*l
예제 #5
0
def test_gmem_access_counter_specialops():

    knl = lp.make_kernel(
            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                """
                c[i, j, k] = (2*a[i,j,k])%(2+b[i,j,k]/3.0)
                e[i, k] = (1+g[i,k])**(1+h[i,k+1])
                """
            ],
            name="specialops", assumptions="n,m,l >= 1")

    knl = lp.add_and_infer_dtypes(knl,
                        dict(a=np.float32, b=np.float32, g=np.float64, h=np.float64))
    poly = get_gmem_access_poly(knl)
    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}
    f32 = poly[
                    (np.dtype(np.float32), 'uniform', 'load')
                    ].eval_with_dict(params)
    f64 = poly[
                    (np.dtype(np.float64), 'uniform', 'load')
                    ].eval_with_dict(params)
    assert f32 == 2*n*m*l
    assert f64 == 2*n*m

    f32 = poly[
                    (np.dtype(np.float32), 'uniform', 'store')
                    ].eval_with_dict(params)
    f64 = poly[
                    (np.dtype(np.float64), 'uniform', 'store')
                    ].eval_with_dict(params)
    assert f32 == n*m*l
    assert f64 == n*m
예제 #6
0
def test_gmem_access_counter_reduction():

    knl = lp.make_kernel(
            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                "c[i, j] = sum(k, a[i, k]*b[k, j])"
            ],
            name="matmul", assumptions="n,m,l >= 1")

    knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
    poly = get_gmem_access_poly(knl)
    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}
    f32 = poly[
                    (np.dtype(np.float32), 'uniform', 'load')
                    ].eval_with_dict(params)
    assert f32 == 2*n*m*l

    f32 = poly[
                    (np.dtype(np.float32), 'uniform', 'store')
                    ].eval_with_dict(params)
    assert f32 == n*l
예제 #7
0
def test_gmem_access_counter_basic():

    knl = lp.make_kernel(
            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                """
                c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
                e[i, k] = g[i,k]*h[i,k+1]
                """
            ],
            name="basic", assumptions="n,m,l >= 1")

    knl = lp.add_and_infer_dtypes(knl,
                        dict(a=np.float32, b=np.float32, g=np.float64, h=np.float64))
    poly = get_gmem_access_poly(knl)
    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}
    f32 = poly[
                    (np.dtype(np.float32), 'uniform', 'load')
                   ].eval_with_dict(params)
    f64 = poly[
                    (np.dtype(np.float64), 'uniform', 'load')
                   ].eval_with_dict(params)
    assert f32 == 3*n*m*l
    assert f64 == 2*n*m

    f32 = poly[
                    (np.dtype(np.float32), 'uniform', 'store')
                   ].eval_with_dict(params)
    f64 = poly[
                    (np.dtype(np.float64), 'uniform', 'store')
                   ].eval_with_dict(params)
    assert f32 == n*m*l
    assert f64 == n*m