Ejemplos de get_barrier_poly en Python, ejemplos de loopy.statistics.get_barrier_poly en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_statistics.py Proyecto: navjotk/loopy

def test_all_counters_parallel_matmul():

    knl = lp.make_kernel(
            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                "c[i, j] = sum(k, a[i, k]*b[k, j])"
            ],
            name="matmul", assumptions="n,m,l >= 1")
    knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
    knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.0")

    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}

    barrier_count = get_barrier_poly(knl).eval_with_dict(params)
    assert barrier_count == 0

    op_map = get_op_poly(knl)
    f32mul = op_map[
                        (np.dtype(np.float32), 'mul')
                        ].eval_with_dict(params)
    f32add = op_map[
                        (np.dtype(np.float32), 'add')
                        ].eval_with_dict(params)
    i32ops = op_map[
                        (np.dtype(np.int32), 'add')
                        ].eval_with_dict(params)
    i32ops += op_map[
                        (np.dtype(np.int32), 'mul')
                        ].eval_with_dict(params)

    assert f32mul+f32add == n*m*l*2
    assert i32ops == n*m*l*4 + l*n*4

    subscript_map = get_gmem_access_poly(knl)
    f32uncoal = subscript_map[
                        (np.dtype(np.float32), 'nonconsecutive', 'load')
                        ].eval_with_dict(params)
    f32coal = subscript_map[
                        (np.dtype(np.float32), 'consecutive', 'load')
                        ].eval_with_dict(params)

    assert f32uncoal == n*m*l
    assert f32coal == n*m*l

    f32coal = subscript_map[
                        (np.dtype(np.float32), 'consecutive', 'store')
                        ].eval_with_dict(params)

    assert f32coal == n*l

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_statistics.py Proyecto: navjotk/loopy

def test_barrier_counter_nobarriers():

    knl = lp.make_kernel(
            "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
            [
                """
                c[i, j, k] = a[i,j,k]*b[i,j,k]/3.0+a[i,j,k]
                e[i, k] = g[i,k]*h[i,k+1]
                """
            ],
            name="basic", assumptions="n,m,l >= 1")

    knl = lp.add_and_infer_dtypes(knl,
                        dict(a=np.float32, b=np.float32, g=np.float64, h=np.float64))
    poly = get_barrier_poly(knl)
    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}
    barrier_count = poly.eval_with_dict(params)
    assert barrier_count == 0

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_statistics.py Proyecto: navjotk/loopy

def test_barrier_counter_barriers():

    knl = lp.make_kernel(
            "[n,m,l] -> {[i,k,j]: 0<=i<50 and 1<=k<98 and 0<=j<10}",
            [
                """
            c[i,j,k] = 2*a[i,j,k] {id=first}
            e[i,j,k] = c[i,j,k+1]+c[i,j,k-1] {dep=first}
            """
            ], [
                lp.TemporaryVariable("c", lp.auto, shape=(50, 10, 99)),
                "..."
            ],
            name="weird2",
            )
    knl = lp.add_and_infer_dtypes(knl, dict(a=np.int32))
    knl = lp.split_iname(knl, "k", 128, outer_tag="g.0", inner_tag="l.0")
    poly = get_barrier_poly(knl)
    n = 512
    m = 256
    l = 128
    params = {'n': n, 'm': m, 'l': l}
    barrier_count = poly.eval_with_dict(params)
    assert barrier_count == 50*10*2

Ejemplo n.º 4

0

Mostrar archivo

Archivo: transpose_perf_predict.py Proyecto: jdsteve2/performance_prediction

        knl = ref_knl
        knl = lp.split_iname(knl, "i", BSIZEy, outer_tag="g.0", inner_tag="l.1")
        knl = lp.split_iname(knl, "j", BSIZEx, outer_tag="g.1", inner_tag="l.0")
        knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"])

        # check = lp.auto_test_vs_ref(ref_knl, ctx, knl, print_code=True)
        # print "Correctness check: \n", check

        # use ptx src to determine resource usage
        cknl = lp.compiled.CompiledKernel(ctx, knl)
        ptx_src = cknl.cl_kernel_info().cl_kernel.program.binaries[0]
        ptx_src_file = open(knl.name + ".ptx", "w")
        ptx_src_file.write(ptx_src)

        barrier_poly = get_barrier_poly(knl)
        barrier_count = barrier_poly.eval_with_dict({"n": n})
        op_map = get_op_poly(knl)
        flops = op_map.get(np.dtype(np.float32), isl.PwQPolynomial("{ 0 }")).eval_with_dict({"n": n})
        iops = op_map.get(np.dtype(np.int32), isl.PwQPolynomial("{ 0 }")).eval_with_dict({"n": n})
        sub_map = get_DRAM_access_poly(knl)  # noqa

        f32coal_l = sub_map.get(
            (np.dtype(np.float32), "consecutive", "load"), isl.PwQPolynomial("{ 0 }")
        ).eval_with_dict({"n": n})
        f32coal_s = sub_map.get(
            (np.dtype(np.float32), "consecutive", "store"), isl.PwQPolynomial("{ 0 }")
        ).eval_with_dict({"n": n})
        f32coal = f32coal_l + f32coal_s
        # print "coalesced: %i, (stores: %i, loads: %i)" % (f32coal, f32coal_s, f32coal_l)
        f32uncoal_l = sub_map.get(

Ejemplo n.º 5

0

Mostrar archivo