Example #1
0
def test_stride_depending_on_args(ctx_factory):
    ctx = ctx_factory()

    twice = lp.make_function("{[i, j]: 0<=i, j < n}",
                             """
            b[i, j] = 2*a[i, j]
            """, [lp.ValueArg("n"),
                  lp.GlobalArg("a"),
                  lp.GlobalArg("b")],
                             name="twice")

    thrice = lp.make_function("{[i, j]: 0<=i, j < n}",
                              """
            b[i, j] = 3*a[i, j]
            """, [
                                  lp.ValueArg("n"),
                                  lp.GlobalArg("a", shape=lp.auto),
                                  lp.GlobalArg("b", shape=lp.auto)
                              ],
                              name="thrice")

    prog = lp.make_kernel(
        "{[i0,i1,i2,i3,i4,i5,i6,i7]: 0<=i0, i1, i2, i3, i4, i5, i6, i7< N}",
        """
            [i0, i1]: y[i0, i1] = twice(N, [i2, i3]: x[2*i2, i3])
            [i4, i5]: z[i4, i5] = thrice(N, [i6, i7]: x[2*i6+1, i7])
            """, [
            lp.ValueArg("N", dtype=np.int32),
            lp.GlobalArg("x", shape=lp.auto, dtype=np.float64), ...
        ])

    prog = lp.merge([prog, twice])
    prog = lp.merge([prog, thrice])

    lp.auto_test_vs_ref(prog, ctx, prog, parameters={"N": 4})
Example #2
0
def test_shape_translation_through_sub_array_ref(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    x1 = cl.clrandom.rand(queue, (3, 2), dtype=np.float64)
    x2 = cl.clrandom.rand(queue, (6, ), dtype=np.float64)
    x3 = cl.clrandom.rand(queue, (6, 6), dtype=np.float64)

    callee1 = lp.make_function("{[i]: 0<=i<6}",
                               """
            b[i] = 2*abs(a[i])
            """,
                               name="callee_fn1")

    callee2 = lp.make_function("{[i, j]: 0<=i<3 and 0 <= j < 2}",
                               """
            b[i, j] = 3*a[i, j]
            """,
                               name="callee_fn2")

    callee3 = lp.make_function("{[i]: 0<=i<6}",
                               """
            b[i] = 5*a[i]
            """,
                               name="callee_fn3")

    knl = lp.make_kernel(
        "{[i, j, k, l]:  0<= i < 6 and 0 <= j < 3 and 0 <= k < 2 and 0<=l<6}",
        """
            [i]: y1[i//2, i%2] = callee_fn1([i]: x1[i//2, i%2])
            [j, k]: y2[2*j+k] = callee_fn2([j, k]: x2[2*j+k])
            [l]: y3[l, l] = callee_fn3([l]: x3[l, l])
            """)

    knl = lp.merge([knl, callee1])
    knl = lp.merge([knl, callee2])
    knl = lp.merge([knl, callee3])

    if inline:
        knl = lp.inline_callable_kernel(knl, "callee_fn1")
        knl = lp.inline_callable_kernel(knl, "callee_fn2")
        knl = lp.inline_callable_kernel(knl, "callee_fn3")

    knl = lp.set_options(knl, "write_cl")
    knl = lp.set_options(knl, "return_dict")
    evt, out_dict = knl(queue, x1=x1, x2=x2, x3=x3)

    y1 = out_dict["y1"].get()
    y2 = out_dict["y2"].get()
    y3 = out_dict["y3"].get()

    assert (np.linalg.norm(y1 - 2 * x1.get())) < 1e-15
    assert (np.linalg.norm(y2 - 3 * x2.get())) < 1e-15
    assert (np.linalg.norm(np.diag(y3 - 5 * x3.get()))) < 1e-15
Example #3
0
def test_unused_hw_axes_in_callee(ctx_factory, inline):
    ctx = ctx_factory()

    twice = lp.make_function("{[i]: 0<=i<10}",
                             """
            y[i] = 2*x[i]
            """,
                             name="twice")

    knl = lp.make_kernel("{[i]: 0<=i<10}",
                         """
            y[:, i] = twice(x[:, i])
            """, [
                             lp.GlobalArg("x", shape=(10, 10), dtype=float),
                             lp.GlobalArg("y", shape=(10, 10))
                         ],
                         name="outer")

    twice = lp.tag_inames(twice, {"i": "l.1"})
    knl = lp.tag_inames(knl, {"i": "l.0"})
    knl = lp.merge([knl, twice])

    if inline:
        knl = lp.inline_callable_kernel(knl, "twice")

    lp.auto_test_vs_ref(knl, ctx, knl)
Example #4
0
def test_check_bounds_with_caller_assumptions(ctx_factory):
    import islpy as isl
    from loopy.diagnostic import LoopyIndexError

    arange = lp.make_function("{[i]: 0<=i<n}",
                              """
        y[i] = i
        """,
                              name="arange")

    knl = lp.make_kernel(
        "{[i]: 0<=i<20}",
        """
        [i]: Y[i] = arange(N)
        """,
        [lp.GlobalArg("Y", shape=(20, )),
         lp.ValueArg("N", dtype=np.int32)],
        name="epoint")

    knl = lp.merge([knl, arange])

    with pytest.raises(LoopyIndexError):
        lp.generate_code_v2(knl)

    knl = knl.with_kernel(
        lp.assume(knl.default_entrypoint, isl.BasicSet("[N] -> { : N <= 20}")))

    lp.auto_test_vs_ref(knl, ctx_factory(), parameters={"N": 15})
Example #5
0
def test_double_hw_axes_used_in_knl_call(inline):
    from loopy.diagnostic import LoopyError

    twice = lp.make_function("{[i]: 0<=i<10}",
                             """
            y[i] = 2*x[i]
            """,
                             name="twice")

    knl = lp.make_kernel("{[i]: 0<=i<10}",
                         """
            y[:, i] = twice(x[:, i])
            """, [
                             lp.GlobalArg("x", shape=(10, 10), dtype=float),
                             lp.GlobalArg("y", shape=(10, 10))
                         ],
                         name="outer")

    twice = lp.tag_inames(twice, {"i": "l.0"})
    knl = lp.tag_inames(knl, {"i": "l.0"})
    knl = lp.merge([knl, twice])

    if inline:
        knl = lp.inline_callable_kernel(knl, "twice")

    with pytest.raises(LoopyError):
        lp.generate_code_v2(knl)
Example #6
0
def test_inlining_with_indirections(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    ones_and_zeros = lp.make_function("{[i, j]: 0<=i<6 and 0<=j<3}",
                                      """
            x[i] = 0.0f
            ...gbarrier
            x[map[j]] = 1.0f
            """,
                                      seq_dependencies=True,
                                      name="ones_and_zeros")

    t_unit = lp.make_kernel(
        "{ : }", """
            y[:] = ones_and_zeros(mymap[:])
            """, [
            lp.GlobalArg("y", shape=6, dtype=lp.auto),
            lp.GlobalArg("mymap", dtype=np.int32, shape=3)
        ])

    t_unit = lp.merge([t_unit, ones_and_zeros])
    t_unit = lp.inline_callable_kernel(t_unit, "ones_and_zeros")

    map_in = np.arange(3).astype(np.int32)

    evt, (out, ) = t_unit(queue, mymap=map_in)

    expected_out = np.array([1, 1, 1, 0, 0, 0]).astype(np.float32)
    assert (expected_out == out).all()
Example #7
0
def test_array_inputs_to_callee_kernels(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    n = 2**3

    x = np.random.rand(n, n)
    y = np.random.rand(n, n)

    child_knl = lp.make_function("{[i, j]:0<=i, j < 8}",
                                 """
            g[i, j] = 2*e[i, j] + 3*f[i, j]
            """,
                                 name="linear_combo")

    parent_knl = lp.make_kernel("{:}",
                                """
            z[:, :] = linear_combo(x, y)
            """,
                                kernel_data=[
                                    lp.GlobalArg(name="x, y, z",
                                                 dtype=np.float64,
                                                 shape=(n, n)), ...
                                ])

    knl = lp.merge([parent_knl, child_knl])
    if inline:
        knl = lp.inline_callable_kernel(knl, "linear_combo")

    evt, (out, ) = knl(queue, x=x, y=y)

    assert (np.linalg.norm(2 * x + 3 * y - out) /
            (np.linalg.norm(2 * x + 3 * y))) < 1e-15
Example #8
0
def test_empty_sub_array_refs(ctx_factory, inline):
    # See: https://github.com/OP2/PyOP2/pull/559#discussion_r272208618
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    x = np.random.randn(10)
    y = np.random.randn(10)

    callee = lp.make_function("{[d]:0<=d<1}",
                              """
            c[d] = a[d] - b[d]
            """,
                              name="wence_function")

    caller = lp.make_kernel(
        "{[i,k]: 0<=i<10 and 0<=k<1}", """
            [k]:z[i+k] = wence_function([k]:x[i+k], [k]:y[i+k])
            """, [lp.GlobalArg("x, y", dtype=np.float64, shape=(10, )), ...])

    caller = lp.merge([caller, callee])

    if inline:
        caller = lp.inline_callable_kernel(caller, "wence_function")

    evt, (out, ) = caller(queue, x=x, y=y)
    assert np.allclose(out, x - y)
Example #9
0
def test_slices_with_negative_step(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    n = 4

    x = np.random.rand(n, n, n, n, n)
    y = np.random.rand(n, n, n, n, n)

    child_knl = lp.make_function("{[i, j]:0<=i, j < 4}",
                                 """
            g[i, j] = 2*e[i, j] + 3*f[i, j]
            """,
                                 name="linear_combo")

    parent_knl = lp.make_kernel("{[i, k, m]: 0<=i, k, m<4}",
                                """
            z[i, 3:-1:-1, k, :, m] = linear_combo(x[i, :, k, :, m],
                                                   y[i, :, k, :, m])
            """,
                                kernel_data=[
                                    lp.GlobalArg(name="x, y, z",
                                                 dtype=np.float64,
                                                 shape=(n, n, n, n, n)), ...
                                ])

    knl = lp.merge([parent_knl, child_knl])
    if inline:
        knl = lp.inline_callable_kernel(knl, "linear_combo")

    evt, (out, ) = knl(queue, x=x, y=y)

    assert (np.linalg.norm(2 * x + 3 * y - out[:, ::-1, :, :, :]) /
            (np.linalg.norm(2 * x + 3 * y))) < 1e-15
Example #10
0
def test_callees_with_gbarriers_are_inlined_with_nested_calls(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    ones_and_zeros = lp.make_function("{[i, j]: 0<=i<6 and 0<=j<3}",
                                      """
            x[i] = 0.0f
            ...gbarrier
            x[j] = 1.0f
            """,
                                      seq_dependencies=True,
                                      name="ones_and_zeros")

    dummy_ones_and_zeros = lp.make_function("{[i]: 0<=i<6}",
                                            """
            [i]: y[i] = ones_and_zeros()
            """,
                                            name="dummy_ones_and_zeros")

    t_unit = lp.make_kernel(
        "{ : }", """
            y[:] = dummy_ones_and_zeros()
            """, [lp.GlobalArg("y", shape=6, dtype=lp.auto)])

    t_unit = lp.merge([t_unit, dummy_ones_and_zeros, ones_and_zeros])
    evt, (out, ) = t_unit(queue)

    expected_out = np.array([1, 1, 1, 0, 0, 0]).astype(np.float32)

    assert (expected_out == out.get()).all()
Example #11
0
def test_packing_unpacking(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    x1 = cl.clrandom.rand(queue, (3, 2), dtype=np.float64)
    x2 = cl.clrandom.rand(queue, (6, ), dtype=np.float64)

    callee1 = lp.make_function("{[i]: 0<=i<6}",
                               """
            b[i] = 2*a[i]
            """,
                               name="callee_fn1")

    callee2 = lp.make_function("{[i, j]: 0<=i<2 and 0 <= j < 3}",
                               """
            b[i, j] = 3*a[i, j]
            """,
                               name="callee_fn2")

    knl = lp.make_kernel(
        "{[i, j, k]:  0<= i < 3 and 0 <= j < 2 and 0 <= k < 6}", """
            [i, j]: y1[i, j] = callee_fn1([i, j]: x1[i, j])
            [k]: y2[k] = callee_fn2([k]: x2[k])
            """)

    knl = lp.merge([knl, callee1])
    knl = lp.merge([knl, callee2])

    knl = lp.pack_and_unpack_args_for_call(knl, "callee_fn1")
    knl = lp.pack_and_unpack_args_for_call(knl, "callee_fn2")

    if inline:
        knl = lp.inline_callable_kernel(knl, "callee_fn1")
        knl = lp.inline_callable_kernel(knl, "callee_fn2")

    knl = lp.set_options(knl, "write_cl")
    knl = lp.set_options(knl, "return_dict")
    evt, out_dict = knl(queue, x1=x1, x2=x2)

    y1 = out_dict["y1"].get()
    y2 = out_dict["y2"].get()

    assert np.linalg.norm(2 * x1.get() - y1) / np.linalg.norm(
        2 * x1.get()) < 1e-15
    assert np.linalg.norm(3 * x2.get() - y2) / np.linalg.norm(
        3 * x2.get()) < 1e-15
Example #12
0
def test_register_knl_with_hw_axes(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    n = 4

    x_dev = cl.clrandom.rand(queue, (n, n, n, n, n), np.float64)
    y_dev = cl.clrandom.rand(queue, (n, n, n, n, n), np.float64)

    callee_knl = lp.make_function("{[i, j]:0<=i, j < 4}",
                                  """
            g[i, j] = 2*e[i, j] + 3*f[i, j]
            """,
                                  name="linear_combo")

    callee_knl = lp.split_iname(callee_knl,
                                "i",
                                1,
                                inner_tag="l.0",
                                outer_tag="g.0")

    caller_knl = lp.make_kernel("{[i, j, k, l, m]: 0<=i, j, k, l, m<4}",
                                """
            [j, l]: z[i, j, k, l, m] = linear_combo([j, l]: x[i, j, k, l, m],
                                                     [j, l]: y[i, j, k, l, m])
            """,
                                name="caller")
    caller_knl = lp.split_iname(caller_knl,
                                "i",
                                4,
                                inner_tag="l.1",
                                outer_tag="g.1")

    knl = lp.merge([caller_knl, callee_knl])

    knl = lp.set_options(knl, "return_dict")

    if inline:
        knl = lp.inline_callable_kernel(knl, "linear_combo")

    evt, out = knl(queue, x=x_dev, y=y_dev)

    x_host = x_dev.get()
    y_host = y_dev.get()

    assert np.linalg.norm(2 * x_host + 3 * y_host - out["z"].get()
                          ) / np.linalg.norm(2 * x_host + 3 * y_host) < 1e-15
Example #13
0
def test_multi_arg_array_call(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    import pymbolic.primitives as p
    n = 10
    acc_i = p.Variable("acc_i")
    i = p.Variable("i")
    index = p.Variable("index")
    a_i = p.Subscript(p.Variable("a"), p.Variable("i"))
    argmin_kernel = lp.make_function("{[i]: 0 <= i < n}", [
        lp.Assignment(id="init2", assignee=index, expression=0),
        lp.Assignment(id="init1", assignee=acc_i, expression="214748367"),
        lp.Assignment(id="insn",
                      assignee=index,
                      expression=p.If(p.Expression.eq(acc_i, a_i), i, index),
                      depends_on="update"),
        lp.Assignment(id="update",
                      assignee=acc_i,
                      expression=p.Variable("min")(acc_i, a_i),
                      depends_on="init1,init2")
    ], [
        lp.GlobalArg("a"),
        lp.GlobalArg(
            "acc_i, index", is_input=False, is_output=True, shape=lp.auto), ...
    ],
                                     name="custom_argmin")

    argmin_kernel = lp.fix_parameters(argmin_kernel, n=n)

    knl = lp.make_kernel(
        "{[i]:0<=i<n}", """
            []: min_val[()], []: min_index[()] = custom_argmin([i]:b[i])
            """)

    knl = lp.fix_parameters(knl, n=n)
    knl = lp.set_options(knl, return_dict=True)

    knl = lp.merge([knl, argmin_kernel])
    b = np.random.randn(n)
    evt, out_dict = knl(queue, b=b)
    tol = 1e-15
    from numpy.linalg import norm
    assert (norm(out_dict["min_val"] - np.min(b)) < tol)
    assert (norm(out_dict["min_index"] - np.argmin(b)) < tol)
Example #14
0
def test_argument_matching_for_inplace_update(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    twice = lp.make_function("{[i]: 0<=i<10}",
                             """
            x[i] = 2*x[i]
            """,
                             name="twice")

    knl = lp.make_kernel("{:}", """
            x[:] = twice(x[:])
            """, [lp.GlobalArg("x", shape=(10, ), dtype=np.float64)])

    knl = lp.merge([knl, twice])

    x = np.random.randn(10)
    evt, (out, ) = knl(queue, x=np.copy(x))

    assert np.allclose(2 * x, out)
Example #15
0
def test_non_zero_start_in_subarray_ref(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    twice = lp.make_function("{[i]: 0<=i<10}",
                             """
            b[i] = 2*a[i]
            """,
                             name="twice")

    knl = lp.make_kernel(
        "{[i, j]: -5<=i<5 and 0<=j<10}", """
            [i]:y[i+5] = twice([j]: x[j])
            """, [lp.GlobalArg("x, y", shape=(10, ), dtype=np.float64)])

    knl = lp.merge([knl, twice])

    x = np.random.randn(10)
    evt, (out, ) = knl(queue, x=np.copy(x))

    assert np.allclose(2 * x, out)
Example #16
0
def test_inlining_with_callee_domain_param(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    fill2 = lp.make_function("{[i]: 0<=i<n}",
                             """
            y[i] = 2.0
            """,
                             name="fill2")

    caller = lp.make_kernel(
        "{[i]: 0<=i<10}", """
            [i]: res[i] = fill2(10)
            """)

    caller = lp.merge([caller, fill2])
    caller = lp.inline_callable_kernel(caller, "fill2")
    evt, (out, ) = caller(queue)

    assert (out == 2).all()
Example #17
0
def test_register_knl(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    n = 4

    x = np.random.rand(n, n, n, n, n)
    y = np.random.rand(n, n, n, n, n)

    grandchild_knl = lp.make_function("{[i, j]:0<= i, j< 4}",
                                      """
            c[i, j] = 2*a[i, j] + 3*b[i, j]
            """,
                                      name="linear_combo1")

    child_knl = lp.make_function("{[i, j]:0<=i, j < 4}",
                                 """
            [i, j]: g[i, j] = linear_combo1([i, j]: e[i, j], [i, j]: f[i, j])
            """,
                                 name="linear_combo2")

    parent_knl = lp.make_kernel("{[i, j, k, l, m]: 0<=i, j, k, l, m<4}",
                                """
            [j, l]: z[i, j, k, l, m] = linear_combo2([j, l]: x[i, j, k, l, m],
                                                     [j, l]: y[i, j, k, l, m])
            """,
                                kernel_data=[
                                    lp.GlobalArg(name="x, y",
                                                 dtype=np.float64,
                                                 shape=(n, n, n, n, n)), ...
                                ])

    knl = lp.merge([grandchild_knl, child_knl, parent_knl])

    if inline:
        knl = lp.inline_callable_kernel(knl, "linear_combo2")
        knl = lp.inline_callable_kernel(knl, "linear_combo1")

    evt, (out, ) = knl(queue, x=x, y=y)

    assert (np.linalg.norm(2 * x + 3 * y - out) /
            (np.linalg.norm(2 * x + 3 * y))) < 1e-15
Example #18
0
def test_passing_and_getting_scalar_in_clbl_knl(ctx_factory, inline):
    ctx = cl.create_some_context()
    cq = cl.CommandQueue(ctx)

    call_sin = lp.make_function("{:}",
                                """
        y = sin(x)
        """,
                                name="call_sin")

    knl = lp.make_kernel(
        "{:}", """
        []: real_y[()] = call_sin(real_x)
        """)

    knl = lp.merge([knl, call_sin])
    knl = lp.set_options(knl, "write_cl")
    if inline:
        knl = lp.inline_callable_kernel(knl, "call_sin")

    evt, (out, ) = knl(cq, real_x=np.asarray(3.0, dtype=float))
Example #19
0
def test_valueargs_being_mapped_in_inling(ctx_factory):
    doublify = lp.make_function(
        "{[i]: 0<=i<n}",
        """
            y[i] = n*x[i]
            """,
        [lp.ValueArg("n", dtype=np.int32), ...],
        name="doublify",
    )

    knl = lp.make_kernel(
        "{[i, j]: 0<=i, j<10}",
        """
            [i]: bar[i] = doublify(10, [j]: foo[j])
            """,
        [lp.GlobalArg("foo", dtype=float, shape=lp.auto), ...],
    )
    knl = lp.merge([knl, doublify])
    knl = lp.inline_callable_kernel(knl, "doublify")

    lp.auto_test_vs_ref(knl, ctx_factory(), knl)
Example #20
0
def test_simplify_indices(ctx_factory):
    ctx = ctx_factory()
    twice = lp.make_function("{[i, j]: 0<=i<10 and 0<=j<4}",
                             """
        y[i,j] = 2*x[i,j]
        """,
                             name="zerozerozeroonezeroify")

    knl = lp.make_kernel(
        "{:}", """
        Y[:,:] = zerozerozeroonezeroify(X[:,:])
        """, [lp.GlobalArg("X,Y", shape=(10, 4), dtype=np.float64)])

    class ContainsFloorDiv(lp.symbolic.CombineMapper):
        def combine(self, values):
            return any(values)

        def map_floor_div(self, expr):
            return True

        def map_variable(self, expr):
            return False

        def map_constant(self, expr):
            return False

    knl = lp.merge([knl, twice])
    knl = lp.inline_callable_kernel(knl, "zerozerozeroonezeroify")
    simplified_knl = lp.simplify_indices(knl)
    contains_floordiv = ContainsFloorDiv()

    assert any(
        contains_floordiv(insn.expression)
        for insn in knl.default_entrypoint.instructions
        if isinstance(insn, lp.MultiAssignmentBase))
    assert all(not contains_floordiv(insn.expression)
               for insn in simplified_knl.default_entrypoint.instructions
               if isinstance(insn, lp.MultiAssignmentBase))

    lp.auto_test_vs_ref(knl, ctx, simplified_knl)
Example #21
0
def test_non1_step_slices(ctx_factory, start, inline):
    # See https://github.com/inducer/loopy/pull/222#discussion_r645905188

    ctx = ctx_factory()
    cq = cl.CommandQueue(ctx)

    callee = lp.make_function("{[i]: 0<=i<n}",
                              """
            y[i] = i**2
            """, [lp.ValueArg("n"), ...],
                              name="squared_arange")

    t_unit = lp.make_kernel("{[i_init, j_init]: 0<=i_init, j_init<40}",
                            f"""
            X[i_init] = 42
            X[{start}:40:3] = squared_arange({len(range(start, 40, 3))})

            Y[j_init] = 1729
            Y[39:{start}:-3] = squared_arange({len(range(39, start, -3))})
            """, [lp.GlobalArg("X,Y", shape=40)],
                            seq_dependencies=True)

    expected_out1 = 42 * np.ones(40, dtype=np.int64)
    expected_out1[start:40:3] = np.arange(len(range(start, 40, 3)))**2

    expected_out2 = 1729 * np.ones(40, dtype=np.int64)
    expected_out2[39:start:-3] = np.arange(len(range(39, start, -3)))**2

    t_unit = lp.merge([t_unit, callee])

    t_unit = lp.set_options(t_unit, "return_dict")

    if inline:
        t_unit = lp.inline_callable_kernel(t_unit, "squared_arange")

    evt, out_dict = t_unit(cq)

    np.testing.assert_allclose(out_dict["X"].get(), expected_out1)
    np.testing.assert_allclose(out_dict["Y"].get(), expected_out2)
Example #22
0
def test_kc_with_floor_div_in_expr(ctx_factory, inline):
    # See https://github.com/inducer/loopy/issues/366
    import loopy as lp

    ctx = ctx_factory()
    callee = lp.make_function("{[i]: 0<=i<10}",
                              """
            x[i] = 2*x[i]
            """,
                              name="callee_with_update")

    knl = lp.make_kernel(
        "{[i]: 0<=i<10}", """
            [i]: x[2*(i//2) + (i%2)] = callee_with_update([i]: x[i])
            """)

    knl = lp.merge([knl, callee])

    if inline:
        knl = lp.inline_callable_kernel(knl, "callee_with_update")

    lp.auto_test_vs_ref(knl, ctx, knl)
Example #23
0
def test_callee_with_auto_offset(ctx_factory):

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    arange = lp.make_function("{[i]: 0<=i<7}",
                              """
        y[i] = 2*y[i]
        """, [lp.GlobalArg("y", offset=lp.auto)],
                              name="dosify")

    knl = lp.make_kernel(
        "{[i]: 0<=i<7}", """
        [i]: y[i] = dosify([i]: y[i])
        """, [lp.GlobalArg("y", offset=3, shape=10)])

    knl = lp.merge([knl, arange])

    y = np.arange(10)
    knl(queue, y=y)
    np.testing.assert_allclose(y[:3], np.arange(3))
    np.testing.assert_allclose(y[3:], 2 * np.arange(3, 10))
Example #24
0
def test_unknown_stride_to_callee(ctx_factory):
    ctx = ctx_factory()

    twice = lp.make_function("{[i, j]: 0<=i, j < n}",
                             """
            b[i, j] = 2*a[i, j]
            """, [lp.ValueArg("n"),
                  lp.GlobalArg("a"),
                  lp.GlobalArg("b")],
                             name="twice")

    prog = lp.make_kernel(
        "{[i,i0,i1,i2,i3]: 0<=i0, i1, i2, i3< N and 0<=i<Nvar}", """
            [i0, i1]: y[i0, i1, i] = twice(N, [i2, i3]: x[2*i2, i3, i])
            """, [
            lp.ValueArg("N", dtype=np.int32),
            lp.ValueArg("Nvar", dtype=np.int32),
            lp.GlobalArg("x", shape=lp.auto, dtype=np.float64), ...
        ])

    prog = lp.merge([prog, twice])

    lp.auto_test_vs_ref(prog, ctx, prog, parameters={"N": 4, "Nvar": 5})
Example #25
0
def test_callee_with_parameter_and_grid(ctx_factory):
    ctx = ctx_factory()
    cq = cl.CommandQueue(ctx)

    callee = lp.make_function("{[i]: 0<=i<n}",
                              """
        y[i] = i
        """,
                              name="arange")

    knl = lp.make_kernel("{[i]: 0<=i<10}", """
        [i]: y[i] = arange(10)
        """)

    knl = lp.merge([callee, knl])
    knl = lp.split_iname(knl,
                         "i",
                         2,
                         outer_tag="g.0",
                         within="in_kernel:arange")

    evt, (out, ) = knl(cq)
    np.testing.assert_allclose(out.get(), np.arange(10))
Example #26
0
def test_incomplete_entrypoint_raises_type_inf_failure():
    from loopy.diagnostic import LoopyError

    twice = lp.make_kernel("{[i]: 0<=i<10}",
                           """
            y[i] = 2*x[i]
            """,
                           name="dosify")

    quadr = lp.make_kernel("{:}",
                           """
            y[:] = dosify(x[:])
            y[:] = dosify(y[:])
            """, [lp.GlobalArg("x,y", shape=(10, ))],
                           name="cuatroify",
                           seq_dependencies=True)

    prog = lp.merge([quadr, twice])

    with pytest.raises(LoopyError):
        # 'twice' is also registered as an entrypoint but provided args aren't
        # enough to infer the types
        lp.generate_code_v2(prog)
Example #27
0
    def map_loopy_call(self, expr: LoopyCall, state: CodeGenState) -> None:
        from loopy.kernel.instruction import make_assignment
        from loopy.symbolic import SubArrayRef

        callee_kernel = expr.translation_unit[expr.entrypoint]

        state.update_program(lp.merge([state.program, expr.translation_unit]))

        domains = []

        def _get_sub_array_ref(array: Array,
                               name: str) -> "lp.symbolic.SubArrayRef":
            inames = tuple(
                state.var_name_gen(f"_{name}_dim{d}")
                for d in range(array.ndim))

            domains.append(
                domain_for_shape(
                    inames, shape_to_scalar_expression(array.shape, self,
                                                       state), {}))

            inames_as_vars = tuple(var(iname) for iname in inames)
            return SubArrayRef(inames_as_vars,
                               prim.Subscript(var(name), inames_as_vars))

        assignees = []
        params = []
        depends_on: Set[str] = set()
        new_tvs = {}
        new_insn_id = state.insn_id_gen(f"call_{callee_kernel.name}")

        for arg in callee_kernel.args:
            # must traverse in the order of callee's args to generate the correct
            # assignees order
            if isinstance(arg, lp.ArrayArg):
                if arg.is_output:
                    assignee_name = _generate_name_for_temp(
                        expr[arg.name], state)
                    assignees.append(
                        _get_sub_array_ref(expr[arg.name], assignee_name))

                    named_array = expr[arg.name]

                    # stored result for the assignee
                    result = StoredResult(assignee_name, named_array.ndim,
                                          frozenset([new_insn_id]))
                    # record the result for the corresponding loopy array
                    state.results[named_array] = result

                    new_tvs[assignee_name] = get_loopy_temporary(
                        assignee_name, named_array, self, state)
                else:
                    assert arg.is_input
                    pt_arg = expr.bindings[arg.name]
                    assert isinstance(pt_arg, Array)

                    pt_arg_rec = self.rec(pt_arg, state)

                    if isinstance(pt_arg_rec, StoredResult):
                        # found a stored result corresponding to the argument, use it
                        name = pt_arg_rec.name
                        params.append(_get_sub_array_ref(pt_arg, name))
                        depends_on.update(pt_arg_rec.depends_on)
                    else:
                        # did not find a stored result for the sub-expression, store
                        # it and then pass it to the call
                        name = _generate_name_for_temp(pt_arg, state)
                        store_insn_id = add_store(name,
                                                  pt_arg,
                                                  pt_arg_rec,
                                                  state,
                                                  output_to_temporary=True,
                                                  cgen_mapper=self)
                        depends_on.add(store_insn_id)
                        # replace "arg" with the created stored variable
                        state.results[pt_arg] = StoredResult(
                            name, pt_arg.ndim, frozenset([store_insn_id]))
                        params.append(_get_sub_array_ref(pt_arg, name))
                        new_tvs[name] = get_loopy_temporary(
                            name, pt_arg, self, state)
            else:
                assert isinstance(arg, lp.ValueArg) and arg.is_input
                pt_arg = expr.bindings[arg.name]
                prstnt_ctx = PersistentExpressionContext(state)

                if isinstance(pt_arg, Array):
                    assert pt_arg.ndim == 0
                    pt_arg_rec = self.rec(pt_arg, state)
                    params.append(
                        pt_arg_rec.to_loopy_expression((), prstnt_ctx))
                    depends_on.update(pt_arg_rec.depends_on)
                else:
                    local_ctx = LocalExpressionContext(reduction_bounds={},
                                                       num_indices=0,
                                                       local_namespace={})
                    params.append(
                        self.exprgen_mapper(pt_arg, prstnt_ctx, local_ctx))

        new_insn = make_assignment(tuple(assignees),
                                   var(expr.entrypoint)(*params),
                                   depends_on=frozenset(depends_on),
                                   id=new_insn_id)

        # update kernel
        kernel = state.kernel
        tvs = state.kernel.temporary_variables.copy()
        tvs.update(new_tvs)

        kernel = kernel.copy(instructions=kernel.instructions + [new_insn],
                             temporary_variables=tvs,
                             domains=kernel.domains + domains)

        state.update_kernel(kernel)