def get_put_kernel(context, dtype, idx_dtype, vec_count=1): ctx = { "idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype), } args = [ VectorArg(dtype, "dest%d" % i, with_offset=True) for i in range(vec_count) ] + [ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True), ] + [ VectorArg(dtype, "src%d" % i, with_offset=True) for i in range(vec_count) ] + [ VectorArg(np.uint8, "use_fill", with_offset=True) ] + [ VectorArg(np.int64, "val_ary_lengths", with_offset=True) ] body = ( "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx + "\n".join( "dest{i}[dest_idx] = (use_fill[{i}] ? src{i}[0] : " "src{i}[i % val_ary_lengths[{i}]]);".format(i=i) for i in range(vec_count) ) ) return get_elwise_kernel(context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="put")
def get_fill_kernel(context, dtype): return get_elwise_kernel( context, "{tp} *z, {tp} a".format(tp=dtype_to_ctype(dtype), ), "z[i] = a", preamble=dtype_to_c_struct(context.devices[0], dtype), name="fill")
def _get_kernel(self, dtype, src_index_dtype, dst_index_dtype, have_src_indices, have_dst_indices, map_values): from boxtree.tools import VectorArg args = [ VectorArg(dtype, "input_ary"), VectorArg(dtype, "output_ary"), ] if have_src_indices: args.append(VectorArg(src_index_dtype, "from_indices")) if have_dst_indices: args.append(VectorArg(dst_index_dtype, "to_indices")) if map_values: args.append(VectorArg(dtype, "value_map")) from pyopencl.tools import dtype_to_ctype src = GAPPY_COPY_TPL.render( dtype=dtype, dtype_to_ctype=dtype_to_ctype, from_dtype=src_index_dtype, to_dtype=dst_index_dtype, from_indices=have_src_indices, to_indices=have_dst_indices, map_values=map_values) from pyopencl.elementwise import ElementwiseKernel return ElementwiseKernel(self.context, args, str(src), preamble=dtype_to_c_struct(self.context.devices[0], dtype), name="gappy_copy_and_map")
def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1): ctx = {"idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype)} args = ( [VectorArg(dtype, "dest%d" % i) for i in range(vec_count)] + [ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True), VectorArg(idx_dtype, "gmem_src_idx", with_offset=True), ] + [VectorArg(dtype, "src%d" % i, with_offset=True) for i in range(vec_count)] + [ScalarArg(idx_dtype, "offset%d" % i) for i in range(vec_count) if with_offsets] ) if with_offsets: def get_copy_insn(i): return "dest%d[dest_idx] = " "src%d[src_idx+offset%d];" % (i, i, i) else: def get_copy_insn(i): return "dest%d[dest_idx] = " "src%d[src_idx];" % (i, i) body = ("%(idx_tp)s src_idx = gmem_src_idx[i];\n" "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx) + "\n".join( get_copy_insn(i) for i in range(vec_count) ) return get_elwise_kernel( context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="take_put" )
def get_put_kernel(context, dtype, idx_dtype, vec_count=1): ctx = { "idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype), } args = [ VectorArg(dtype, "dest%d" % i, with_offset=True) for i in range(vec_count) ] + [ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True), ] + [ VectorArg(dtype, "src%d" % i, with_offset=True) for i in range(vec_count) ] body = ("%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx + "\n".join("dest%d[dest_idx] = src%d[i];" % (i, i) for i in range(vec_count))) return get_elwise_kernel(context, args, body, preamble=dtype_to_c_struct( context.devices[0], dtype), name="put")
def _get_kernel(self, dtype, src_index_dtype, dst_index_dtype, have_src_indices, have_dst_indices, map_values): from pyopencl.tools import VectorArg args = [ VectorArg(dtype, "input_ary", with_offset=True), VectorArg(dtype, "output_ary", with_offset=True), ] if have_src_indices: args.append(VectorArg(src_index_dtype, "from_indices", with_offset=True)) if have_dst_indices: args.append(VectorArg(dst_index_dtype, "to_indices", with_offset=True)) if map_values: args.append(VectorArg(dtype, "value_map", with_offset=True)) from pyopencl.tools import dtype_to_ctype src = GAPPY_COPY_TPL.render( dtype=dtype, dtype_to_ctype=dtype_to_ctype, from_dtype=src_index_dtype, to_dtype=dst_index_dtype, from_indices=have_src_indices, to_indices=have_dst_indices, map_values=map_values) from pyopencl.elementwise import ElementwiseKernel return ElementwiseKernel(self.context, args, str(src), preamble=dtype_to_c_struct(self.context.devices[0], dtype), name="gappy_copy_and_map")
def get_fill_kernel(context, dtype): return get_elwise_kernel(context, "%(tp)s *z, %(tp)s a" % { "tp": dtype_to_ctype(dtype), }, "z[i] = a", preamble=dtype_to_c_struct(context.devices[0], dtype), name="fill")
def get_take_kernel(context, dtype, idx_dtype, vec_count=1): ctx = {"idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype)} args = ( [VectorArg(dtype, "dest" + str(i), with_offset=True) for i in range(vec_count)] + [VectorArg(dtype, "src" + str(i), with_offset=True) for i in range(vec_count)] + [VectorArg(idx_dtype, "idx", with_offset=True)] ) body = ("%(idx_tp)s src_idx = idx[i];\n" % ctx) + "\n".join( "dest%d[i] = src%d[src_idx];" % (i, i) for i in range(vec_count) ) return get_elwise_kernel(context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="take")
def get_put_kernel(context, dtype, idx_dtype, vec_count=1): ctx = {"idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype)} args = ( [VectorArg(dtype, "dest%d" % i, with_offset=True) for i in range(vec_count)] + [VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True)] + [VectorArg(dtype, "src%d" % i, with_offset=True) for i in range(vec_count)] ) body = "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx + "\n".join( "dest%d[dest_idx] = src%d[i];" % (i, i) for i in range(vec_count) ) return get_elwise_kernel(context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="put")
def get_copy_kernel(context, dtype_dest, dtype_src): src = "src[i]" if dtype_dest.kind == "c" != dtype_src.kind: src = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_dest), src) if dtype_dest.kind == "c" and dtype_src != dtype_dest: src = ("%s_cast(%s)" % (complex_dtype_to_name(dtype_dest), src),) if dtype_dest != dtype_src and (dtype_dest.kind == "V" or dtype_src.kind == "V"): raise TypeError("copying between non-identical struct types") return get_elwise_kernel( context, "%(tp_dest)s *dest, %(tp_src)s *src" % {"tp_dest": dtype_to_ctype(dtype_dest), "tp_src": dtype_to_ctype(dtype_src)}, "dest[i] = %s" % src, preamble=dtype_to_c_struct(context.devices[0], dtype_dest), name="copy", )
def get_copy_kernel(context, dtype_dest, dtype_src): src = "src[i]" if dtype_dest.kind == "c" != dtype_src.kind: src = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_dest), src) if dtype_dest.kind == "c" and dtype_src != dtype_dest: src = "%s_cast(%s)" % (complex_dtype_to_name(dtype_dest), src), if dtype_dest != dtype_src and ( dtype_dest.kind == "V" or dtype_src.kind == "V"): raise TypeError("copying between non-identical struct types") return get_elwise_kernel(context, "%(tp_dest)s *dest, %(tp_src)s *src" % { "tp_dest": dtype_to_ctype(dtype_dest), "tp_src": dtype_to_ctype(dtype_src), }, "dest[i] = %s" % src, preamble=dtype_to_c_struct(context.devices[0], dtype_dest), name="copy")
def get_take_kernel(context, dtype, idx_dtype, vec_count=1): ctx = { "idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype), } args = ([VectorArg(dtype, "dest" + str(i), with_offset=True) for i in range(vec_count)] + [VectorArg(dtype, "src" + str(i), with_offset=True) for i in range(vec_count)] + [VectorArg(idx_dtype, "idx", with_offset=True)]) body = ( ("%(idx_tp)s src_idx = idx[i];\n" % ctx) + "\n".join( "dest%d[i] = src%d[src_idx];" % (i, i) for i in range(vec_count))) return get_elwise_kernel(context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="take")
def get_copy_kernel(context, dtype_dest, dtype_src): src = "src[i]" if dtype_dest.kind == "c" != dtype_src.kind: src = "{}_fromreal({})".format(complex_dtype_to_name(dtype_dest), src) if dtype_dest.kind == "c" and dtype_src != dtype_dest: src = "{}_cast({})".format(complex_dtype_to_name(dtype_dest), src), if dtype_dest != dtype_src and ( dtype_dest.kind == "V" or dtype_src.kind == "V"): raise TypeError("copying between non-identical struct types") return get_elwise_kernel(context, "{tp_dest} *dest, {tp_src} *src".format( tp_dest=dtype_to_ctype(dtype_dest), tp_src=dtype_to_ctype(dtype_src), ), "dest[i] = %s" % src, preamble=dtype_to_c_struct(context.devices[0], dtype_dest), name="copy")
def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1): ctx = { "idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype), } args = [ VectorArg(dtype, "dest%d" % i) for i in range(vec_count) ] + [ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True), VectorArg(idx_dtype, "gmem_src_idx", with_offset=True), ] + [ VectorArg(dtype, "src%d" % i, with_offset=True) for i in range(vec_count) ] + [ ScalarArg(idx_dtype, "offset%d" % i) for i in range(vec_count) if with_offsets ] if with_offsets: def get_copy_insn(i): return ("dest%d[dest_idx] = " "src%d[src_idx+offset%d];" % (i, i, i)) else: def get_copy_insn(i): return ("dest%d[dest_idx] = " "src%d[src_idx];" % (i, i)) body = (("%(idx_tp)s src_idx = gmem_src_idx[i];\n" "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx) + "\n".join(get_copy_insn(i) for i in range(vec_count))) return get_elwise_kernel(context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="take_put")