Ejemplo n.º 1
0
def test_codegen_with_DictOfNamedArrays(ctx_factory):  # noqa
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    namespace = pt.Namespace()
    x = Placeholder(namespace, "x", (5, ), np.int)
    y = Placeholder(namespace, "y", (5, ), np.int)
    x_in = np.array([1, 2, 3, 4, 5])
    y_in = np.array([6, 7, 8, 9, 10])

    result = pt.DictOfNamedArrays(dict(x_out=x, y_out=y))

    # Without return_dict.
    prog = pt.generate_loopy(result, target=pt.PyOpenCLTarget(queue))
    _, (x_out, y_out) = prog(x=x_in, y=y_in)
    assert (x_out == x_in).all()
    assert (y_out == y_in).all()

    # With return_dict.
    prog = pt.generate_loopy(result,
                             target=pt.PyOpenCLTarget(queue),
                             options=lp.Options(return_dict=True))

    _, outputs = prog(x=x_in, y=y_in)
    assert (outputs["x_out"] == x_in).all()
    assert (outputs["y_out"] == y_in).all()
Ejemplo n.º 2
0
def test_dict_of_named_array_codegen_avoids_recomputation():
    ns = pt.Namespace()
    x = pt.make_placeholder(ns, shape=(10, 4), dtype=float, name="x")
    y = 2 * x
    z = y + 4 * x

    yz = pt.DictOfNamedArrays({"y": y, "z": z})

    knl = pt.generate_loopy(yz).program
    assert ("y" in knl.id_to_insn["z_store"].read_dependency_names())
Ejemplo n.º 3
0
def main():
    rank = comm.Get_rank()
    size = comm.Get_size()
    rng = np.random.default_rng()

    x_in = rng.integers(100, size=(4, 4))
    x = pt.make_data_wrapper(x_in)

    mytag = (main, "x")
    halo = staple_distributed_send(x,
                                   dest_rank=(rank - 1) % size,
                                   comm_tag=mytag,
                                   stapled_to=make_distributed_recv(
                                       src_rank=(rank + 1) % size,
                                       comm_tag=mytag,
                                       shape=(4, 4),
                                       dtype=int))

    y = x + halo

    # Find the partition
    outputs = pt.DictOfNamedArrays({"out": y})
    distributed_parts = find_distributed_partition(outputs)
    distributed_parts, _ = number_distributed_tags(comm,
                                                   distributed_parts,
                                                   base_tag=42)
    prg_per_partition = generate_code_for_partition(distributed_parts)

    if 0:
        from pytato.visualization import show_dot_graph
        show_dot_graph(distributed_parts)

    # Sanity check
    from pytato.visualization import get_dot_graph_from_partition
    get_dot_graph_from_partition(distributed_parts)

    # Execute the distributed partition
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    context = execute_distributed_partition(distributed_parts,
                                            prg_per_partition, queue, comm)

    final_res = context["out"].get(queue)

    ref_res = comm.bcast(final_res)

    np.testing.assert_allclose(ref_res, final_res)

    if rank == 0:
        print("Distributed test succeeded.")
Ejemplo n.º 4
0
def _do_test_distributed_execution_basic(ctx_factory):
    from mpi4py import MPI  # pylint: disable=import-error
    comm = MPI.COMM_WORLD

    rank = comm.Get_rank()
    size = comm.Get_size()

    rng = np.random.default_rng(seed=27)

    x_in = rng.integers(100, size=(4, 4))
    x = pt.make_data_wrapper(x_in)

    halo = staple_distributed_send(x,
                                   dest_rank=(rank - 1) % size,
                                   comm_tag=42,
                                   stapled_to=make_distributed_recv(
                                       src_rank=(rank + 1) % size,
                                       comm_tag=42,
                                       shape=(4, 4),
                                       dtype=int))

    y = x + halo

    # Find the partition
    outputs = pt.DictOfNamedArrays({"out": y})
    distributed_parts = find_distributed_partition(outputs)
    prg_per_partition = generate_code_for_partition(distributed_parts)

    # Execute the distributed partition
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    context = execute_distributed_partition(distributed_parts,
                                            prg_per_partition, queue, comm)

    final_res = context["out"].get(queue)

    # All ranks generate the same random numbers (same seed).
    np.testing.assert_allclose(x_in * 2, final_res)
Ejemplo n.º 5
0
def main():
    x_in = np.random.randn(2, 2)
    x = pt.make_data_wrapper(x_in)
    y = pt.stack([x @ x.T, 2 * x, 42 + x])
    y = y + 55

    tm = TopoSortMapper()
    tm(y)

    from functools import partial
    pfunc = partial(get_partition_id, tm.topological_order)

    # Find the partitions
    outputs = pt.DictOfNamedArrays({"out": y})
    partition = find_partition(outputs, pfunc)

    # Show the partitions
    from pytato.visualization import get_dot_graph_from_partition
    get_dot_graph_from_partition(partition)

    # Execute the partitions
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    prg_per_partition = generate_code_for_partition(partition)

    context = execute_partition(partition, prg_per_partition, queue)

    final_res = [context[k] for k in outputs.keys()]

    # Execute the unpartitioned code for comparison
    prg = pt.generate_loopy(y)
    _, (out, ) = prg(queue)

    np.testing.assert_allclose([out], final_res)

    print("Partitioning test succeeded.")
Ejemplo n.º 6
0
import numpy as np
import pytato as pt

n = pt.make_size_param("n")
a = pt.make_placeholder(name="a", shape=(n, n), dtype=np.float64)

a2a = a @ (2 * a)
aat = a @ a.T
result = pt.DictOfNamedArrays({"a2a": a2a, "aat": aat})

# {{{ execute

import pyopencl as cl

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
prg = pt.generate_loopy(result, cl_device=queue.device)
a = np.random.randn(20, 20)
_, out = prg(queue, a=a)
assert np.allclose(out["a2a"], a @ (2 * a))
assert np.allclose(out["aat"], a @ a.T)

# }}}

# {{{ generate OpenCL code

prg = pt.generate_loopy(result)

import loopy as lp

print(lp.generate_code_v2(prg.program).device_code())
Ejemplo n.º 7
0
def _do_test_distributed_execution_random_dag(ctx_factory):
    from mpi4py import MPI  # pylint: disable=import-error
    comm = MPI.COMM_WORLD

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    rank = comm.Get_rank()
    size = comm.Get_size()

    from testlib import RandomDAGContext, make_random_dag

    axis_len = 4
    comm_fake_prob = 500

    gen_comm_called = False

    ntests = 10
    for i in range(ntests):
        seed = 120 + i
        print(f"Step {i} {seed}")

        # {{{ compute value with communication

        comm_tag = 17

        def gen_comm(rdagc):
            nonlocal gen_comm_called
            gen_comm_called = True

            nonlocal comm_tag
            comm_tag += 1
            tag = (comm_tag, _RandomDAGTag)

            inner = make_random_dag(rdagc)
            return staple_distributed_send(inner,
                                           dest_rank=(rank - 1) % size,
                                           comm_tag=tag,
                                           stapled_to=make_distributed_recv(
                                               src_rank=(rank + 1) % size,
                                               comm_tag=tag,
                                               shape=inner.shape,
                                               dtype=inner.dtype))

        rdagc_comm = RandomDAGContext(np.random.default_rng(seed=seed),
                                      axis_len=axis_len,
                                      use_numpy=False,
                                      additional_generators=[(comm_fake_prob,
                                                              gen_comm)])
        x_comm = make_random_dag(rdagc_comm)

        distributed_partition = find_distributed_partition(
            pt.DictOfNamedArrays({"result": x_comm}))

        # Transform symbolic tags into numeric ones for MPI
        distributed_partition, _new_mpi_base_tag = number_distributed_tags(
            comm, distributed_partition, base_tag=comm_tag)

        prg_per_partition = generate_code_for_partition(distributed_partition)

        context = execute_distributed_partition(distributed_partition,
                                                prg_per_partition, queue, comm)

        res_comm = context["result"]

        # }}}

        # {{{ compute ref value without communication

        # compiled evaluation (i.e. use_numpy=False) fails for some of these
        # graphs, cf. https://github.com/inducer/pytato/pull/255
        rdagc_no_comm = RandomDAGContext(
            np.random.default_rng(seed=seed),
            axis_len=axis_len,
            use_numpy=True,
            additional_generators=[(comm_fake_prob,
                                    lambda rdagc: make_random_dag(rdagc))])
        res_no_comm_numpy = make_random_dag(rdagc_no_comm)

        # }}}

        if not isinstance(res_comm, np.ndarray):
            res_comm = res_comm.get(queue=queue)

        np.testing.assert_allclose(res_comm, res_no_comm_numpy)

    assert gen_comm_called