def test_deduplicate_data_wrappers(): from pytato.transform import CachedWalkMapper, deduplicate_data_wrappers class DataWrapperCounter(CachedWalkMapper): def __init__(self): self.count = 0 super().__init__() def map_data_wrapper(self, expr): self.count += 1 return super().map_data_wrapper(expr) def count_data_wrappers(expr): dwc = DataWrapperCounter() dwc(expr) return dwc.count a = pt.make_data_wrapper(np.arange(27)) b = pt.make_data_wrapper(np.arange(27)) c = pt.make_data_wrapper(a.data.view()) d = pt.make_data_wrapper(np.arange(1, 28)) res = a+b+c+d assert count_data_wrappers(res) == 4 dd_res = deduplicate_data_wrappers(res) assert count_data_wrappers(dd_res) == 3
def test_unary_arith(ctx_factory, which): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) op = getattr(operator, which) x_orig = np.array([1, 2, 3, 4, 5]) namespace = pt.Namespace() exprs = {} for dtype in ARITH_DTYPES: exprs[dtype] = op(pt.make_data_wrapper(namespace, x_orig.astype(dtype))) prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs), target=pt.PyOpenCLTarget(queue), options=lp.Options(return_dict=True)) _, outputs = prog() for dtype in ARITH_DTYPES: out = outputs[dtype] out_ref = op(x_orig.astype(dtype)) assert out.dtype == out_ref.dtype assert np.array_equal(out, out_ref)
def call_loopy(self, program, **kwargs): # FIXME:always happens eagerly import pytato as pt from numbers import Number prg_kwargs = {} assert not any( isinstance(arg, pt.Placeholder) for arg in kwargs.values()) for arg_name, arg in kwargs.items(): if isinstance(arg, pt.array.DataWrapper): prg_kwargs[arg_name] = arg.data elif isinstance(arg, Number): prg_kwargs[arg_name] = arg else: raise NotImplementedError options = program.options if not (options.return_dict and options.no_numpy): raise ValueError( "Loopy program passed to call_loopy must " "have return_dict and no_numpy options set. " "Did you use meshmode.array_context.make_loopy_program " "to create this program?") evt, result = program(self.queue, **prg_kwargs) pt_results = { res_name: pt.make_data_wrapper(self.ns, res) for res_name, res in result.items() } return pt_results
def test_scalar_array_binary_arith(ctx_factory, which, reverse): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) op = getattr(operator, which) if reverse: op = reverse_args(op) x_orig = 7 y_orig = np.array([1, 2, 3, 4, 5]) for first_dtype in (int, float, complex): namespace = pt.Namespace() x_in = first_dtype(x_orig) exprs = {} for dtype in ARITH_DTYPES: y = pt.make_data_wrapper(namespace, y_orig.astype(dtype), name=f"y{dtype}") exprs[dtype] = op(x_in, y) prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs), target=pt.PyOpenCLTarget(queue), options=lp.Options(return_dict=True)) _, outputs = prog() for dtype in exprs: out = outputs[dtype] out_ref = op(x_in, y_orig.astype(dtype)) assert out.dtype == out_ref.dtype, (out.dtype, out_ref.dtype) # In some cases ops are done in float32 in loopy but float64 in numpy. assert np.allclose(out, out_ref), (out, out_ref)
def test_slice(ctx_factory, shape): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) from numpy.random import default_rng rng = default_rng() x_in = rng.random(size=shape) namespace = pt.Namespace() x = pt.make_data_wrapper(namespace, x_in) outputs = {} ref_outputs = {} i = 0 for slice_ in generate_test_slices(shape): outputs[f"out_{i}"] = x[slice_] ref_outputs[f"out_{i}"] = x_in[slice_] i += 1 prog = pt.generate_loopy(pt.make_dict_of_named_arrays(outputs), target=pt.PyOpenCLTarget(queue), options=lp.Options(return_dict=True)) _, outputs = prog() for output in outputs: x_out = outputs[output] x_ref = ref_outputs[output] assert (x_out == x_ref).all()
def test_concatenate(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) from numpy.random import default_rng rng = default_rng() x0_in = rng.random(size=(3, 9, 3)) x1_in = rng.random(size=(3, 11, 3)) x2_in = rng.random(size=(3, 22, 3)) namespace = pt.Namespace() x0 = pt.make_data_wrapper(namespace, x0_in) x1 = pt.make_data_wrapper(namespace, x1_in) x2 = pt.make_data_wrapper(namespace, x2_in) assert_allclose_to_numpy(pt.concatenate((x0, x1, x2), axis=1), queue)
def make_random_constant(rdagc: RandomDAGContext, naxes: int) -> Any: shape = (rdagc.axis_len,) * naxes result = rdagc.rng.uniform(1e-3, 1, size=shape) if rdagc.use_numpy: return result else: return pt.make_data_wrapper(result)
def test_stack(ctx_factory, input_dims): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) shape = (2, 2, 2)[:input_dims] from numpy.random import default_rng rng = default_rng() x_in = rng.random(size=shape) y_in = rng.random(size=shape) namespace = pt.Namespace() x = pt.make_data_wrapper(namespace, x_in) y = pt.make_data_wrapper(namespace, y_in) for axis in range(0, 1 + input_dims): assert_allclose_to_numpy(pt.stack((x, y), axis=axis), queue)
def freeze(self, array): import pytato as pt import pyopencl.array as cla if isinstance(array, pt.Placeholder): cl_array = cla.empty(self.queue, shape=array.shape, dtype=array.dtype) return pt.make_data_wrapper(self.ns, cl_array) prg = pt.generate_loopy(array).program prog_kwargs = { arg_name: self.ns[arg_name].data for arg_name in prg.arg_dict if arg_name in self.ns and isinstance(self.ns[arg_name], pt.array.DataWrapper) } evt, (cl_array, ) = prg(self.queue, **prog_kwargs) return pt.make_data_wrapper(self.ns, cl_array)
def test_matmul(ctx_factory, x1_ndim, x2_ndim): ctx = ctx_factory() queue = cl.CommandQueue(ctx) def get_array(ndim): arr = np.array([[1, 2], [3, 4]]) return arr[(0, ) * (arr.ndim - ndim)] x1_in = get_array(x1_ndim) x2_in = get_array(x2_ndim) namespace = pt.Namespace() x1 = pt.make_data_wrapper(namespace, x1_in) x2 = pt.make_data_wrapper(namespace, x2_in) prog = pt.generate_loopy(x1 @ x2, target=pt.PyOpenCLTarget(queue)) _, (out, ) = prog() assert (out == x1_in @ x2_in).all()
def test_data_wrapper(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) # Without name/shape namespace = pt.Namespace() x_in = np.array([1, 2, 3, 4, 5]) x = pt.make_data_wrapper(namespace, x_in) prog = pt.generate_loopy(x, target=pt.PyOpenCLTarget(queue)) _, (x_out, ) = prog() assert (x_out == x_in).all() # With name/shape namespace = pt.Namespace() x_in = np.array([[1, 2], [3, 4], [5, 6]]) pt.make_size_param(namespace, "n") x = pt.make_data_wrapper(namespace, x_in, name="x", shape="(n, 2)") prog = pt.generate_loopy(x, target=pt.PyOpenCLTarget(queue)) _, (x_out, ) = prog() assert (x_out == x_in).all()
def test_reshape(ctx_factory, oldshape, newshape): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) from numpy.random import default_rng rng = default_rng() x_in = rng.random(size=oldshape) namespace = pt.Namespace() x = pt.make_data_wrapper(namespace, x_in) assert_allclose_to_numpy(pt.reshape(x, newshape=newshape), queue)
def test_stack(ctx_factory, input_dims): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) shape = (2, 2, 2)[:input_dims] from numpy.random import default_rng rng = default_rng() x_in = rng.random(size=shape) y_in = rng.random(size=shape) namespace = pt.Namespace() x = pt.make_data_wrapper(namespace, x_in) y = pt.make_data_wrapper(namespace, y_in) for axis in range(0, 1 + input_dims): prog = pt.generate_loopy( pt.stack((x, y), axis=axis), target=pt.PyOpenCLTarget(queue)) _, (out,) = prog() assert (out == np.stack((x_in, y_in), axis=axis)).all()
def test_array_array_binary_arith(ctx_factory, which, reverse): if which == "sub": pytest.skip("https://github.com/inducer/loopy/issues/131") cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) op = getattr(operator, which) if reverse: op = reverse_args(op) x_orig = np.array([1, 2, 3, 4, 5]) y_orig = np.array([10, 9, 8, 7, 6]) for first_dtype in ARITH_DTYPES: namespace = pt.Namespace() x_in = x_orig.astype(first_dtype) x = pt.make_data_wrapper(namespace, x_in, name="x") exprs = {} for dtype in ARITH_DTYPES: y = pt.make_data_wrapper(namespace, y_orig.astype(dtype), name=f"y{dtype}") exprs[dtype] = op(x, y) prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs), target=pt.PyOpenCLTarget(queue), options=lp.Options(return_dict=True)) _, outputs = prog() for dtype in ARITH_DTYPES: out = outputs[dtype] out_ref = op(x_in, y_orig.astype(dtype)) assert out.dtype == out_ref.dtype, (out.dtype, out_ref.dtype) # In some cases ops are done in float32 in loopy but float64 in numpy. assert np.allclose(out, out_ref), (out, out_ref)
def test_transpose(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) shape = (2, 8) from numpy.random import default_rng rng = default_rng() x_in = rng.random(size=shape) namespace = pt.Namespace() x = pt.make_data_wrapper(namespace, x_in) assert_allclose_to_numpy(x.T, queue)
def main(): rank = comm.Get_rank() size = comm.Get_size() rng = np.random.default_rng() x_in = rng.integers(100, size=(4, 4)) x = pt.make_data_wrapper(x_in) mytag = (main, "x") halo = staple_distributed_send(x, dest_rank=(rank - 1) % size, comm_tag=mytag, stapled_to=make_distributed_recv( src_rank=(rank + 1) % size, comm_tag=mytag, shape=(4, 4), dtype=int)) y = x + halo # Find the partition outputs = pt.DictOfNamedArrays({"out": y}) distributed_parts = find_distributed_partition(outputs) distributed_parts, _ = number_distributed_tags(comm, distributed_parts, base_tag=42) prg_per_partition = generate_code_for_partition(distributed_parts) if 0: from pytato.visualization import show_dot_graph show_dot_graph(distributed_parts) # Sanity check from pytato.visualization import get_dot_graph_from_partition get_dot_graph_from_partition(distributed_parts) # Execute the distributed partition ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) context = execute_distributed_partition(distributed_parts, prg_per_partition, queue, comm) final_res = context["out"].get(queue) ref_res = comm.bcast(final_res) np.testing.assert_allclose(ref_res, final_res) if rank == 0: print("Distributed test succeeded.")
def test_axis_permutation(ctx_factory, axes): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) ndim = len(axes) shape = (3, 4, 5)[:ndim] from numpy.random import default_rng rng = default_rng() x_in = rng.random(size=shape) namespace = pt.Namespace() x = pt.make_data_wrapper(namespace, x_in) assert_allclose_to_numpy(pt.transpose(x, axes), queue)
def test_transpose(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) shape = (2, 8) from numpy.random import default_rng rng = default_rng() x_in = rng.random(size=shape) namespace = pt.Namespace() x = pt.make_data_wrapper(namespace, x_in) prog = pt.generate_loopy(x.T, target=pt.PyOpenCLTarget(queue)) _, (x_out,) = prog() assert (x_out == x_in.T).all()
def test_axis_permutation(ctx_factory, axes): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) ndim = len(axes) shape = (3, 4, 5)[:ndim] from numpy.random import default_rng rng = default_rng() x_in = rng.random(size=shape) namespace = pt.Namespace() x = pt.make_data_wrapper(namespace, x_in) prog = pt.generate_loopy( pt.transpose(x, axes), target=pt.PyOpenCLTarget(queue)) _, (x_out,) = prog() assert (x_out == np.transpose(x_in, axes)).all()
def _do_test_distributed_execution_basic(ctx_factory): from mpi4py import MPI # pylint: disable=import-error comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() rng = np.random.default_rng(seed=27) x_in = rng.integers(100, size=(4, 4)) x = pt.make_data_wrapper(x_in) halo = staple_distributed_send(x, dest_rank=(rank - 1) % size, comm_tag=42, stapled_to=make_distributed_recv( src_rank=(rank + 1) % size, comm_tag=42, shape=(4, 4), dtype=int)) y = x + halo # Find the partition outputs = pt.DictOfNamedArrays({"out": y}) distributed_parts = find_distributed_partition(outputs) prg_per_partition = generate_code_for_partition(distributed_parts) # Execute the distributed partition ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) context = execute_distributed_partition(distributed_parts, prg_per_partition, queue, comm) final_res = context["out"].get(queue) # All ranks generate the same random numbers (same seed). np.testing.assert_allclose(x_in * 2, final_res)
def main(): x_in = np.random.randn(2, 2) x = pt.make_data_wrapper(x_in) y = pt.stack([x @ x.T, 2 * x, 42 + x]) y = y + 55 tm = TopoSortMapper() tm(y) from functools import partial pfunc = partial(get_partition_id, tm.topological_order) # Find the partitions outputs = pt.DictOfNamedArrays({"out": y}) partition = find_partition(outputs, pfunc) # Show the partitions from pytato.visualization import get_dot_graph_from_partition get_dot_graph_from_partition(partition) # Execute the partitions ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) prg_per_partition = generate_code_for_partition(partition) context = execute_partition(partition, prg_per_partition, queue) final_res = [context[k] for k in outputs.keys()] # Execute the unpartitioned code for comparison prg = pt.generate_loopy(y) _, (out, ) = prg(queue) np.testing.assert_allclose([out], final_res) print("Partitioning test succeeded.")
def normals(self): return pt.make_data_wrapper(self.discr.normals, shape=(self.nelements, 2))
def interp_mat(self): return pt.make_data_wrapper(self.discr.interp, shape=(2, self.nnodes))
def inv_mass_mat(self): return pt.make_data_wrapper(self.discr.inv_mass, shape=(self.nnodes, self.nnodes))
def stiffness_mat(self): return pt.make_data_wrapper(self.discr.stiffness, shape=(self.nnodes, self.nnodes))
def face_mass_mat(self): return pt.make_data_wrapper(self.discr.face_mass, shape=(self.nnodes, 2))
def from_numpy(self, np_array: np.ndarray): import pytato as pt import pyopencl.array as cla cl_array = cla.to_device(self.queue, np_array) return pt.make_data_wrapper(self.ns, cl_array)
import numpy as np import pytato as pt import pyopencl as cl ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) for axis in (None, 1, 0): for redn in ("sum", "amax", "amin", "prod"): x_in = np.random.randn(20, 10) x = pt.make_data_wrapper(x_in) x2 = x.T @ x np_func = getattr(np, redn) pt_func = getattr(pt, redn) prg = pt.generate_loopy(pt_func(x2, axis=axis), cl_device=queue.device) evt, (out, ) = prg(queue) evt.wait() print("redn =", redn, ", axis =", axis, ", max error =", np.amax(abs(out - np_func(x_in.T @ x_in, axis))))
def getter(self): data = getattr(self.discr, name) return pt.make_data_wrapper(self.namespace, data, name=name, shape=shape)
import numpy as np import pytato as pt import pyopencl as cl ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) spec = "ij,ij->ij" n = 4 a_in = np.random.rand(n, n) b_in = np.random.rand(n, n) a = pt.make_data_wrapper(a_in) b = pt.make_data_wrapper(b_in) prg = pt.generate_loopy(pt.einsum(spec, a, b), cl_device=queue.device) evt, (out,) = prg(queue) ans = np.einsum(spec, a_in, b_in) assert np.linalg.norm(out - ans) <= 1e-15