def test_custom_type_fill(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.skip("device has LLVM arg counting bug") dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "mmc_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 1000 z_dev = cl.array.empty(queue, n, dtype=dtype) z_dev.fill(np.zeros((), dtype)) z = z_dev.get() assert np.array_equal(np.zeros(n, dtype), z)
def test_custom_type_take_put(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "tp_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 100 z = np.empty(100, dtype) z["cur_min"] = np.arange(n) z["cur_max"] = np.arange(n)**2 z_dev = cl.array.to_device(queue, z) ind = cl.array.arange(queue, n, step=3, dtype=np.int32) z_ind_ref = z[ind.get()] z_ind = z_dev[ind] assert np.array_equal(z_ind.get(), z_ind_ref)
def test_custom_type_zeros(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) if not ( queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): pytest.skip("CL1.2 not available") dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "mmc_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 1000 z_dev = cl.array.zeros(queue, n, dtype=dtype) z = z_dev.get() assert np.array_equal(np.zeros(n, dtype), z)
def test_custom_type_zeros(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) if not (queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): pytest.skip("CL1.2 not available") dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "mmc_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 1000 z_dev = cl.array.zeros(queue, n, dtype=dtype) z = z_dev.get() assert np.array_equal(np.zeros(n, dtype), z)
def create_struct_type(device, struct_name, struct): def create_array_type(name, decl): dtype = get_or_register_dtype(''.join(decl.type.type.type.names)) if isinstance(decl.type.dim, Constant): dims = int(decl.type.dim.value) elif isinstance(decl.type.dim, BinaryOp) and decl.type.dim.op == '+': dims = int(decl.type.dim.left.value) + int(decl.type.dim.right.value) else: raise NotImplementedError return name, dtype, dims field_decls = struct.decls struct_fields = [] # iterate over struct fields for field_decl in field_decls: field_name = field_decl.name # field is a scalar if isinstance(field_decl.type, TypeDecl): type_name = ' '.join(field_decl.type.type.names) field_type = type_name if type_name != 'bool' else 'char' struct_fields.append((field_name, get_or_register_dtype(field_type))) # field is an array with defined size elif isinstance(field_decl.type, ArrayDecl): struct_fields.append(create_array_type(field_name, field_decl)) else: raise NotImplementedError(f'field `{field_name}` of struct `{struct_name}` has a type that can not be understood') # register struct struct_dtype = np.dtype(struct_fields) struct_dtype, _ = match_dtype_to_c_struct(device, struct_name, struct_dtype) struct_dtype = get_or_register_dtype(struct_name, struct_dtype) return struct_dtype
def argmin_kernal(context): import numpy as np mmc_dtype = np.dtype([ ("cur_min", np.float32), ("cur_index", np.int32), ("pad", np.int32), ]) name = "argmin_collector" from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct mmc_dtype, mmc_c_decl = match_dtype_to_c_struct(device, name, mmc_dtype) mmc_dtype = get_or_register_dtype(name, mmc_dtype) preamble = mmc_c_decl + r"""//CL// argmin_collector mmc_neutral() { // FIXME: needs infinity literal in real use, ok here argmin_collector result; result.cur_min = INFINITY; result.cur_index = -1; return result; } argmin_collector mmc_from_scalar(float x,int index) { argmin_collector result; result.cur_min = x; result.cur_index = index; return result; } argmin_collector agg_mmc(argmin_collector a, argmin_collector b) { argmin_collector result = a; if (b.cur_min < result.cur_min) { result.cur_min = b.cur_min; result.cur_index = b.cur_index; } return result; } """ from pyopencl.reduction import ReductionKernel red = ReductionKernel(context, mmc_dtype, neutral="mmc_neutral()", reduce_expr="agg_mmc(a, b)", map_expr="mmc_from_scalar(x[i],i)", arguments="__global int *x", preamble=preamble) return red
def test_struct_with_array_fields(ctx_factory): # # typedef struct { # uint x[2]; # float y; # uint z[3][4]; # } my_struct; # cl_ctx = ctx_factory() device = cl_ctx.devices[0] queue = cl.CommandQueue(cl_ctx) my_struct = np.dtype([("x", cltypes.uint, 2), ("y", cltypes.int), ("z", cltypes.uint, (3, 4))]) my_struct, cdecl = cl_tools.match_dtype_to_c_struct( device, "my_struct", my_struct) # a random buffer of 4 structs my_struct_arr = np.array([ ([81, 24], -57, [[15, 28, 45, 7], [71, 95, 65, 84], [2, 11, 59, 9]]), ([5, 20], 47, [[15, 53, 7, 59], [73, 22, 27, 86], [59, 6, 39, 49]]), ([11, 99], -32, [[73, 83, 4, 65], [19, 21, 22, 27], [1, 55, 6, 64]]), ([57, 38], -54, [[74, 90, 38, 67], [77, 30, 99, 18], [91, 3, 63, 67]]) ], dtype=my_struct) expected_res = [] for x in my_struct_arr: expected_res.append(int(np.sum(x[0]) + x[1] + np.sum(x[2]))) expected_res = np.array(expected_res, dtype=cltypes.int) kernel_src = """%s // this kernel sums every number contained in each struct __kernel void array_structs(__global my_struct *structs, __global int *res) { int i = get_global_id(0); my_struct s = structs[i]; res[i] = s.x[0] + s.x[1] + s.y; for (int r = 0; r < 3; r++) for (int c = 0; c < 4; c++) res[i] += s.z[r][c]; }""" % cdecl mem_flags1 = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR mem_flags2 = mem_flags.WRITE_ONLY my_struct_buf = cl.Buffer(cl_ctx, mem_flags1, hostbuf=my_struct_arr) res_buf = cl.Buffer(cl_ctx, mem_flags2, size=expected_res.nbytes) program = cl.Program(cl_ctx, kernel_src).build() kernel = program.array_structs kernel(queue, (4, ), None, my_struct_buf, res_buf) res = np.empty_like(expected_res) cl.enqueue_copy(queue, res, res_buf) assert (res == expected_res).all()
def make_mmc_dtype(device): dtype = np.dtype([("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32)]) name = "minmax_collector" from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return dtype, c_decl
def make_mmc_dtype(device): dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) name = "minmax_collector" from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return dtype, c_decl
def _make_sort_scan_type(device, bits, index_dtype): name = "pyopencl_sort_scan_%s_%dbits_t" % (index_dtype.type.__name__, bits) fields = [] for mnr in range(2**bits): fields.append(('c%s' % _padded_bin(mnr, bits), index_dtype)) dtype = np.dtype(fields) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return name, dtype, c_decl
def _make_sort_scan_type(device, bits, index_dtype): name = "pyopencl_sort_scan_%s_%dbits_t" % (index_dtype.type.__name__, bits) fields = [] for mnr in range(2 ** bits): fields.append(("c%s" % _padded_bin(mnr, bits), index_dtype)) dtype = np.dtype(fields) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return name, dtype, c_decl
def make_bounding_box_dtype(device, dimensions, coord_dtype): from boxtree.tools import AXIS_NAMES fields = [] for i in range(dimensions): fields.append(("min_%s" % AXIS_NAMES[i], coord_dtype)) fields.append(("max_%s" % AXIS_NAMES[i], coord_dtype)) dtype = np.dtype(fields) name = "boxtree_bbox_%dd_%s_t" % (dimensions, get_type_moniker(coord_dtype)) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return dtype, c_decl
def make_collector_dtype(device, dtype, props, name, only_min, only_max): fields = [("pad", np.int32)] for prop in props: if not only_min: fields.append(("cur_max_%s" % prop, dtype)) if not only_max: fields.append(("cur_min_%s" % prop, dtype)) custom_dtype = np.dtype(fields) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct custom_dtype, c_decl = match_dtype_to_c_struct(device, name, custom_dtype) custom_dtype = get_or_register_dtype(name, custom_dtype) return custom_dtype, c_decl
def make_cl_dtype(cl_state, name, dtype): """ Generate an OpenCL structure typedef codelet from a numpy structured array dtype. Args: cl_state (obj): name (str): dtype (numpy.dtype): Returns: numpy.dtype, pyopencl.dtype, str: processed dtype, cl dtype, CL typedef codelet """ processed_dtype, c_decl \ = cltools.match_dtype_to_c_struct(cl_state.device, name, dtype) return processed_dtype, cltools.get_or_register_dtype( name, processed_dtype), c_decl