def create_struct_type(device, struct_name, struct): def create_array_type(name, decl): dtype = get_or_register_dtype(''.join(decl.type.type.type.names)) if isinstance(decl.type.dim, Constant): dims = int(decl.type.dim.value) elif isinstance(decl.type.dim, BinaryOp) and decl.type.dim.op == '+': dims = int(decl.type.dim.left.value) + int(decl.type.dim.right.value) else: raise NotImplementedError return name, dtype, dims field_decls = struct.decls struct_fields = [] # iterate over struct fields for field_decl in field_decls: field_name = field_decl.name # field is a scalar if isinstance(field_decl.type, TypeDecl): type_name = ' '.join(field_decl.type.type.names) field_type = type_name if type_name != 'bool' else 'char' struct_fields.append((field_name, get_or_register_dtype(field_type))) # field is an array with defined size elif isinstance(field_decl.type, ArrayDecl): struct_fields.append(create_array_type(field_name, field_decl)) else: raise NotImplementedError(f'field `{field_name}` of struct `{struct_name}` has a type that can not be understood') # register struct struct_dtype = np.dtype(struct_fields) struct_dtype, _ = match_dtype_to_c_struct(device, struct_name, struct_dtype) struct_dtype = get_or_register_dtype(struct_name, struct_dtype) return struct_dtype
def __init__(self, image_width, image_height, kernels, templates, workgroup_shape, cache_dir="./cl_cache", sorted_output=False): self.image_height, self.image_width = image_height, image_width self.half_max_kernel_width = self.get_max_width(kernels) self.array_size = image_width*image_height self.workgroup_shape = workgroup_shape self.sorted_output = sorted_output self.global_work_shape_v = (RoundUp(workgroup_shape[0], image_height), RoundUp(workgroup_shape[1], image_width)) self.global_work_shape_h = (RoundUp(workgroup_shape[1], image_width), RoundUp(workgroup_shape[0], image_height)) self.cache_dir = cache_dir self.kernel_host_buffers = None self.kernel_gpu_buffers = None self.source_host_buffer = None self.source_gpu_buffer = None self.filtered_host_buffer = None self.filtered_gpu_buffer = None self.filtered_host_back_buffers = None self.indices_host_back_buffers = None self.indices_host_buffer = None self.indices_gpu_buffer = None self.sorted_indices_gpu_buffer = None self.temporal_host_buffers = None self.temporal_gpu_buffers = None self.kernel_sources = None self.kernel_programs = None self.context, self.device = self.create_context() if self.context == None: print "Failed to create OpenCL context." return 1 self.queue = cl.CommandQueue(self.context, self.device) self.init_buffers(kernels) self.init_programs(image_width, image_height, kernels, templates) if sorted_output: self.init_indices_buffers(image_width, image_height, kernels) cl_tools.get_or_register_dtype("uint", numpy.uint32) cl_tools.get_or_register_dtype("unsigned int", numpy.uint32)
def _create_vector_types(): field_names = ["x", "y", "z", "w"] from pyopencl.tools import get_or_register_dtype vec.types = {} vec.type_to_scalar_and_count = {} counts = [2, 3, 4, 8, 16] for base_name, base_type in [ ('char', np.int8), ('uchar', np.uint8), ('short', np.int16), ('ushort', np.uint16), ('int', np.int32), ('uint', np.uint32), ('long', np.int64), ('ulong', np.uint64), ('float', np.float32), ('double', np.float64), ]: for count in counts: name = "%s%d" % (base_name, count) titles = field_names[:count] if len(titles) < count: titles.extend((count - len(titles)) * [None]) dtype = np.dtype( dict(names=["s%d" % i for i in range(count)], formats=[base_type] * count, titles=titles)) get_or_register_dtype(name, dtype) setattr(vec, name, dtype) my_field_names = ",".join(field_names[:count]) my_field_names_defaulted = ",".join("%s=0" % fn for fn in field_names[:count]) setattr( vec, "make_" + name, staticmethod( eval( "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names_defaulted, my_field_names), dict(array=np.array, my_dtype=dtype)))) vec.types[np.dtype(base_type), count] = dtype vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count
def _create_vector_types(): field_names = ["x", "y", "z", "w"] from pyopencl.tools import get_or_register_dtype vec.types = {} vec.type_to_scalar_and_count = {} counts = [2, 3, 4, 8, 16] for base_name, base_type in [ ('char', np.int8), ('uchar', np.uint8), ('short', np.int16), ('ushort', np.uint16), ('int', np.int32), ('uint', np.uint32), ('long', np.int64), ('ulong', np.uint64), ('float', np.float32), ('double', np.float64), ]: for count in counts: name = "%s%d" % (base_name, count) titles = field_names[:count] if len(titles) < count: titles.extend((count-len(titles))*[None]) dtype = np.dtype(dict( names=["s%d" % i for i in range(count)], formats=[base_type]*count, titles=titles)) get_or_register_dtype(name, dtype) setattr(vec, name, dtype) my_field_names = ",".join(field_names[:count]) my_field_names_defaulted = ",".join( "%s=0" % fn for fn in field_names[:count]) setattr(vec, "make_"+name, staticmethod(eval( "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names_defaulted, my_field_names), dict(array=np.array, my_dtype=dtype)))) vec.types[np.dtype(base_type), count] = dtype vec.type_to_scalar_and_count[dtype] = np.dtype(base_type), count
def test_custom_type_take_put(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "tp_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 100 z = np.empty(100, dtype) z["cur_min"] = np.arange(n) z["cur_max"] = np.arange(n)**2 z_dev = cl.array.to_device(queue, z) ind = cl.array.arange(queue, n, step=3, dtype=np.int32) z_ind_ref = z[ind.get()] z_ind = z_dev[ind] assert np.array_equal(z_ind.get(), z_ind_ref)
def test_custom_type_zeros(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) if not ( queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): pytest.skip("CL1.2 not available") dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "mmc_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 1000 z_dev = cl.array.zeros(queue, n, dtype=dtype) z = z_dev.get() assert np.array_equal(np.zeros(n, dtype), z)
def test_custom_type_fill(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.skip("device has LLVM arg counting bug") dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "mmc_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 1000 z_dev = cl.array.empty(queue, n, dtype=dtype) z_dev.fill(np.zeros((), dtype)) z = z_dev.get() assert np.array_equal(np.zeros(n, dtype), z)
def get_shift_kernel(num_shifts): shift_preamble = Template(""" typedef struct { % for i in range(n): cfloat_t v${i}; % endfor }shift_t${n}; __global shift_t${n} shift_red(shift_t${n} a, shift_t${n} b){ % for i in range(n): a.v${i} += b.v${i}; % endfor return a; } __global shift_t${n} shift_start(){ shift_t${n} t; % for i in range(n): t.v${i}=0; % endfor return t; } __global shift_t${n} shift_map(cfloat_t x, % for i in range(n): float shift${i}, % endfor float offset, float slen){ shift_t${n} t; float pphase = offset * 2 * 3.141592653 / slen; float phase, pr, pi; % for i in range(n): phase = pphase * shift${i}; pi = sincos(phase, &pr); // Phase shift the input data (x) to correspond to a time shift t.v${i}.x = x.x * pr - x.y * pi; t.v${i}.y = x.x * pi + x.y * pr; % endfor return t; } """).render(n = num_shifts) shift_map_args = "" shift_krnl_args = "" for i in range(num_shifts): shift_map_args += " shift%s," % i shift_krnl_args += " float shift%s, " % i sd = numpy.dtype([("v1", numpy.complex64, num_shifts)]) shift_t = get_or_register_dtype('shift_t%s' % num_shifts, sd) shift_krnl = LowerLatencyReductionKernel(pycbc.scheme.mgr.state.context, shift_t, neutral="shift_start()", reduce_expr="shift_red(a, b)", map_expr="shift_map(x[i], " + shift_map_args + " offset+i, slen)", arguments="__global cfloat_t *x," + shift_krnl_args + "float offset, float slen ", preamble=shift_preamble) return shift_krnl
def test_custom_type_zeros(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) if not (queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): pytest.skip("CL1.2 not available") dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "mmc_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 1000 z_dev = cl.array.zeros(queue, n, dtype=dtype) z = z_dev.get() assert np.array_equal(np.zeros(n, dtype), z)
def argmin_kernal(context): import numpy as np mmc_dtype = np.dtype([ ("cur_min", np.float32), ("cur_index", np.int32), ("pad", np.int32), ]) name = "argmin_collector" from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct mmc_dtype, mmc_c_decl = match_dtype_to_c_struct(device, name, mmc_dtype) mmc_dtype = get_or_register_dtype(name, mmc_dtype) preamble = mmc_c_decl + r"""//CL// argmin_collector mmc_neutral() { // FIXME: needs infinity literal in real use, ok here argmin_collector result; result.cur_min = INFINITY; result.cur_index = -1; return result; } argmin_collector mmc_from_scalar(float x,int index) { argmin_collector result; result.cur_min = x; result.cur_index = index; return result; } argmin_collector agg_mmc(argmin_collector a, argmin_collector b) { argmin_collector result = a; if (b.cur_min < result.cur_min) { result.cur_min = b.cur_min; result.cur_index = b.cur_index; } return result; } """ from pyopencl.reduction import ReductionKernel red = ReductionKernel(context, mmc_dtype, neutral="mmc_neutral()", reduce_expr="agg_mmc(a, b)", map_expr="mmc_from_scalar(x[i],i)", arguments="__global int *x", preamble=preamble) return red
def create_array_type(name, decl): dtype = get_or_register_dtype(''.join(decl.type.type.type.names)) if isinstance(decl.type.dim, Constant): dims = int(decl.type.dim.value) elif isinstance(decl.type.dim, BinaryOp) and decl.type.dim.op == '+': dims = int(decl.type.dim.left.value) + int(decl.type.dim.right.value) else: raise NotImplementedError return name, dtype, dims
def make_stratumMeta_dtype(device): dtype = np.dtype([ ("x", np.int32), ("y", np.int32), ("data", np.float32, (1,2)) ]) name = "stratumMeta" from pyopencl.tools import get_or_register_dtype dtype = get_or_register_dtype(name, dtype) return dtype
def make_mmc_dtype(device): dtype = np.dtype([("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32)]) name = "minmax_collector" from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return dtype, c_decl
def make_stratumMeta_dtype(self): dtype = np.dtype([ ('x', np.int32), ('y', np.int32), ('rStart',np.int32), ('cStart', np.int32), ('w', np.float32, (int(self.w.shape[0]),int(self.w.shape[1]))), ('h', np.float32, (int(self.h.shape[0]),int(self.h.shape[1]))) ]) name = "stratumMeta" from pyopencl.tools import get_or_register_dtype dtype = get_or_register_dtype(name, dtype) return dtype
def _make_sort_scan_type(device, bits, index_dtype): name = "pyopencl_sort_scan_%s_%dbits_t" % (index_dtype.type.__name__, bits) fields = [] for mnr in range(2**bits): fields.append(('c%s' % _padded_bin(mnr, bits), index_dtype)) dtype = np.dtype(fields) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return name, dtype, c_decl
def make_mmc_dtype(device): dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) name = "minmax_collector" from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return dtype, c_decl
def _make_sort_scan_type(device, bits, index_dtype): name = "pyopencl_sort_scan_%s_%dbits_t" % (index_dtype.type.__name__, bits) fields = [] for mnr in range(2 ** bits): fields.append(("c%s" % _padded_bin(mnr, bits), index_dtype)) dtype = np.dtype(fields) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return name, dtype, c_decl
def make_bounding_box_dtype(device, dimensions, coord_dtype): from boxtree.tools import AXIS_NAMES fields = [] for i in range(dimensions): fields.append(("min_%s" % AXIS_NAMES[i], coord_dtype)) fields.append(("max_%s" % AXIS_NAMES[i], coord_dtype)) dtype = np.dtype(fields) name = "boxtree_bbox_%dd_%s_t" % (dimensions, get_type_moniker(coord_dtype)) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct dtype, c_decl = match_dtype_to_c_struct(device, name, dtype) dtype = get_or_register_dtype(name, dtype) return dtype, c_decl
def make_collector_dtype(device, dtype, props, name, only_min, only_max): fields = [("pad", np.int32)] for prop in props: if not only_min: fields.append(("cur_max_%s" % prop, dtype)) if not only_max: fields.append(("cur_min_%s" % prop, dtype)) custom_dtype = np.dtype(fields) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct custom_dtype, c_decl = match_dtype_to_c_struct(device, name, custom_dtype) custom_dtype = get_or_register_dtype(name, custom_dtype) return custom_dtype, c_decl
def make_cl_dtype(cl_state, name, dtype): """ Generate an OpenCL structure typedef codelet from a numpy structured array dtype. Args: cl_state (obj): name (str): dtype (numpy.dtype): Returns: numpy.dtype, pyopencl.dtype, str: processed dtype, cl dtype, CL typedef codelet """ processed_dtype, c_decl \ = cltools.match_dtype_to_c_struct(cl_state.device, name, dtype) return processed_dtype, cltools.get_or_register_dtype( name, processed_dtype), c_decl
from pycbc.types import Array import pycbc.scheme from pyopencl import array as clarray from pyopencl.reduction import ReductionKernel from pyopencl.tools import dtype_to_ctype, get_or_register_dtype from pyopencl.elementwise import ElementwiseKernel, complex_dtype_to_name from pyopencl.tools import context_dependent_memoize from mako.template import Template def chisq_accum_bin(chisq, q): chisq += q.squared_norm() cfloat = complex_dtype_to_name(numpy.complex64) cdouble = complex_dtype_to_name(numpy.complex128) get_or_register_dtype('cfloat', numpy.complex64) get_or_register_dtype('cdouble', numpy.complex128) class LowerLatencyReductionKernel(ReductionKernel): def __call__(self, *args, **kwargs): MAX_GROUP_COUNT = 1024 SMALL_SEQ_COUNT = 4 from pyopencl.array import empty stage_inf = self.stage_1_inf final_result = kwargs.pop("result", None) queue = kwargs.pop("queue", None) wait_for = kwargs.pop("wait_for", None) return_event = kwargs.pop("return_event", False)
maxloc_preamble_single = """ #define MAXLOCN maxlocs #define TTYPE float #define LTYPE int """ + maxloc_preamble maxloc_preamble_double = """ #define MAXLOCN maxlocd #define TTYPE double #define LTYPE long """ + maxloc_preamble maxloc_dtype_double = np.dtype([("max", np.float64), ("loc", np.int64)]) maxloc_dtype_single = np.dtype([("max", np.float32), ("loc", np.int32)]) maxloc_dtype_single = get_or_register_dtype("maxlocs", dtype=maxloc_dtype_single) maxloc_dtype_double = get_or_register_dtype("maxlocd", dtype=maxloc_dtype_double) mls = ReductionKernel(mgr.state.context, maxloc_dtype_single, neutral = "maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(x[i], i)", arguments="float *x", preamble=maxloc_preamble_single) mld = ReductionKernel(mgr.state.context, maxloc_dtype_double, neutral = "maxloc_start()", reduce_expr="maxloc_red(a, b)", map_expr="maxloc_map(x[i], i)", arguments="double *x", preamble=maxloc_preamble_double) max_loc_map = {'single':mls,'double':mld} cfloat = complex_dtype_to_name(np.complex64) cdouble = complex_dtype_to_name(np.complex128)
def _create_vector_types(): _mapping = [(k, globals()[k]) for k in [ 'char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double' ]] def set_global(key, val): globals()[key] = val field_names = ["x", "y", "z", "w"] set_global('types', {}) set_global('type_to_scalar_and_count', {}) counts = [2, 3, 4, 8, 16] for base_name, base_type in _mapping: for count in counts: name = "%s%d" % (base_name, count) titles = field_names[:count] padded_count = count if count == 3: padded_count = 4 names = ["s%d" % i for i in range(count)] while len(names) < padded_count: names.append("padding%d" % (len(names) - count)) if len(titles) < len(names): titles.extend((len(names) - len(titles)) * [None]) try: dtype = np.dtype( dict(names=names, formats=[base_type] * padded_count, titles=titles)) except NotImplementedError: try: dtype = np.dtype([((n, title), base_type) for (n, title) in zip(names, titles)]) except TypeError: dtype = np.dtype([(n, base_type) for (n, title) in zip(names, titles)]) get_or_register_dtype(name, dtype) set_global(name, dtype) def create_array(dtype, count, padded_count, *args, **kwargs): if len(args) < count: from warnings import warn warn( "default values for make_xxx are deprecated;" " instead specify all parameters or use" " cltypes.zeros_xxx", DeprecationWarning) padded_args = tuple( list(args) + [0] * (padded_count - len(args))) array = eval( "array(padded_args, dtype=dtype)", dict(array=np.array, padded_args=padded_args, dtype=dtype)) for key, val in list(kwargs.items()): array[key] = val return array set_global( "make_" + name, eval( "lambda *args, **kwargs: create_array(dtype, %i, %i, " "*args, **kwargs)" % (count, padded_count), dict(create_array=create_array, dtype=dtype))) set_global("filled_" + name, eval("lambda val: make_%s(*[val]*%i)" % (name, count))) set_global("zeros_" + name, eval("lambda: filled_%s(0)" % (name))) set_global("ones_" + name, eval("lambda: filled_%s(1)" % (name))) globals()['types'][np.dtype(base_type), count] = dtype globals()['type_to_scalar_and_count'][dtype] = np.dtype( base_type), count
def _create_vector_types(): _mapping = [(k, globals()[k]) for k in ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double']] def set_global(key, val): globals()[key] = val field_names = ["x", "y", "z", "w"] set_global('types', {}) set_global('type_to_scalar_and_count', {}) counts = [2, 3, 4, 8, 16] for base_name, base_type in _mapping: for count in counts: name = "%s%d" % (base_name, count) titles = field_names[:count] padded_count = count if count == 3: padded_count = 4 names = ["s%d" % i for i in range(count)] while len(names) < padded_count: names.append("padding%d" % (len(names) - count)) if len(titles) < len(names): titles.extend((len(names) - len(titles)) * [None]) try: dtype = np.dtype(dict( names=names, formats=[base_type] * padded_count, titles=titles)) except NotImplementedError: try: dtype = np.dtype([((n, title), base_type) for (n, title) in zip(names, titles)]) except TypeError: dtype = np.dtype([(n, base_type) for (n, title) in zip(names, titles)]) get_or_register_dtype(name, dtype) set_global(name, dtype) def create_array(dtype, count, padded_count, *args, **kwargs): if len(args) < count: from warnings import warn warn("default values for make_xxx are deprecated;" " instead specify all parameters or use" " cltypes.zeros_xxx", DeprecationWarning) padded_args = tuple(list(args) + [0] * (padded_count - len(args))) array = eval("array(padded_args, dtype=dtype)", dict(array=np.array, padded_args=padded_args, dtype=dtype)) for key, val in list(kwargs.items()): array[key] = val return array set_global("make_" + name, eval( "lambda *args, **kwargs: create_array(dtype, %i, %i, " "*args, **kwargs)" % (count, padded_count), dict(create_array=create_array, dtype=dtype))) set_global("filled_" + name, eval( "lambda val: make_%s(*[val]*%i)" % (name, count))) set_global("zeros_" + name, eval("lambda: filled_%s(0)" % (name))) set_global("ones_" + name, eval("lambda: filled_%s(1)" % (name))) globals()['types'][np.dtype(base_type), count] = dtype globals()['type_to_scalar_and_count'][dtype] = np.dtype(base_type), count