Ejemplo n.º 1
0
def test_add_nosync():
    orig_knl = lp.make_kernel("{[i]: 0<=i<10}",
        """
        <>tmp[i] = 10 {id=insn1}
        <>tmp2[i] = 10 {id=insn2}

        <>tmp3[2*i] = 0 {id=insn3}
        <>tmp4 = 1 + tmp3[2*i] {id=insn4}

        <>tmp5[i] = 0 {id=insn5,groups=g1}
        tmp5[i] = 1 {id=insn6,conflicts=g1}
        """)

    orig_knl = lp.set_temporary_scope(orig_knl, "tmp3", "local")
    orig_knl = lp.set_temporary_scope(orig_knl, "tmp5", "local")

    # No dependency present - don't add nosync
    knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2",
            empty_ok=True)
    assert frozenset() == knl.id_to_insn["insn2"].no_sync_with

    # Dependency present
    knl = lp.add_nosync(orig_knl, "local", "writes:tmp3", "reads:tmp3")
    assert frozenset() == knl.id_to_insn["insn3"].no_sync_with
    assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with

    # Bidirectional
    knl = lp.add_nosync(
            orig_knl, "local", "writes:tmp3", "reads:tmp3", bidirectional=True)
    assert frozenset([("insn4", "local")]) == knl.id_to_insn["insn3"].no_sync_with
    assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with

    # Groups
    knl = lp.add_nosync(orig_knl, "local", "insn5", "insn6")
    assert frozenset([("insn5", "local")]) == knl.id_to_insn["insn6"].no_sync_with
Ejemplo n.º 2
0
def test_add_nosync():
    orig_knl = lp.make_kernel("{[i]: 0<=i<10}",
        """
        <>tmp[i] = 10 {id=insn1}
        <>tmp2[i] = 10 {id=insn2}

        <>tmp3[2*i] = 0 {id=insn3}
        <>tmp4 = 1 + tmp3[2*i] {id=insn4}

        <>tmp5[i] = 0 {id=insn5,groups=g1}
        tmp5[i] = 1 {id=insn6,conflicts=g1}
        """)

    orig_knl = lp.set_temporary_scope(orig_knl, "tmp3", "local")
    orig_knl = lp.set_temporary_scope(orig_knl, "tmp5", "local")

    # No dependency present - don't add nosync
    knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2",
            empty_ok=True)
    assert frozenset() == knl.id_to_insn["insn2"].no_sync_with

    # Dependency present
    knl = lp.add_nosync(orig_knl, "local", "writes:tmp3", "reads:tmp3")
    assert frozenset() == knl.id_to_insn["insn3"].no_sync_with
    assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with

    # Bidirectional
    knl = lp.add_nosync(
            orig_knl, "local", "writes:tmp3", "reads:tmp3", bidirectional=True)
    assert frozenset([("insn4", "local")]) == knl.id_to_insn["insn3"].no_sync_with
    assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with

    # Groups
    knl = lp.add_nosync(orig_knl, "local", "insn5", "insn6")
    assert frozenset([("insn5", "local")]) == knl.id_to_insn["insn6"].no_sync_with
Ejemplo n.º 3
0
def test_global_temporary(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
        "{ [i]: 0<=i<n}", """
            <> c[i] = a[i + 1]
            out[i] = c[i]
            """)

    knl = lp.add_and_infer_dtypes(knl, {
        "a": np.float32,
        "c": np.float32,
        "out": np.float32,
        "n": np.int32
    })
    knl = lp.set_temporary_scope(knl, "c", "global")

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")

    cgr = lp.generate_code_v2(knl)

    assert len(cgr.device_programs) == 2

    #print(cgr.device_code())
    #print(cgr.host_code())

    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))
Ejemplo n.º 4
0
def test_global_temporary(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
            "{ [i]: 0<=i<n}",
            """
            <> c[i] = a[i + 1]
            out[i] = c[i]
            """)

    knl = lp.add_and_infer_dtypes(knl,
            {"a": np.float32, "c": np.float32, "out": np.float32, "n": np.int32})
    knl = lp.set_temporary_scope(knl, "c", "global")

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")

    cgr = lp.generate_code_v2(knl)

    assert len(cgr.device_programs) == 2

    #print(cgr.device_code())
    #print(cgr.host_code())

    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))
Ejemplo n.º 5
0
def test_kernel_splitting_with_loop_and_private_temporary(ctx_factory):
    ctx = ctx_factory()

    pytest.xfail("spilling doesn't yet use local axes")

    knl = lp.make_kernel(
        "{ [i,k]: 0<=i<n and 0<=k<3 }", """
            <> t_private_scalar = a[k,i+1]
            <> t_private_array[i % 2] = a[k,i+1]
            c[k,i] = a[k,i+1]
            out[k,i] = c[k,i] + t_private_scalar + t_private_array[i % 2]
            """)

    knl = lp.add_and_infer_dtypes(knl, {
        "a": np.float32,
        "c": np.float32,
        "out": np.float32,
        "n": np.int32
    })
    knl = lp.set_temporary_scope(knl, "t_private_scalar", "private")
    knl = lp.set_temporary_scope(knl, "t_private_array", "private")

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")

    # schedule
    from loopy.preprocess import preprocess_kernel
    knl = preprocess_kernel(knl)

    from loopy.schedule import get_one_scheduled_kernel
    knl = get_one_scheduled_kernel(knl)

    # map schedule onto host or device
    print(knl)

    cgr = lp.generate_code_v2(knl)

    assert len(cgr.device_programs) == 2

    print(cgr.device_code())
    print(cgr.host_code())

    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))