def norm(self, dtype1, out=None): if out is None: out = dtype1 ctypes = dtypes.ctype(dtype1).replace(' ', '_') out_ctype = dtypes.ctype(out).replace(' ', '_') name = "_{prefix}_norm__{out}__{signature}".format( prefix=self.prefix, out=out_ctype, signature = '_'.join(ctypes)) self.functions[name] = ('norm', (out, dtype1)) return name
def complex_exp(self, dtype1, out=None): if out is None: out = dtype1 ctypes = dtypes.ctype(dtype1) ctypes = [ctype.replace(' ', '_') for ctype in ctypes] out_ctype = dtypes.ctype(out).replace(' ', '_') name = "_{prefix}_complex_exp__{out}__{signature}".format( prefix=self.prefix, out=out_ctype, signature = '_'.join(ctypes)) self.functions[name] = ('complex_exp', (out, dtype1)) return name
def div(self, dtype1, dtype2, out=None): if out is None: out = numpy.result_type(dtype1, dtype2) ctypes = [dtypes.ctype(dt) for dt in (dtype1, dtype2)] ctypes = [ctype.replace(' ', '_') for ctype in ctypes] out_ctype = dtypes.ctype(out).replace(' ', '_') name = "_{prefix}_div__{out}__{signature}".format( prefix=self.prefix, out=out_ctype, signature = '_'.join(ctypes)) self.functions[name] = ('div', (out, dtype1, dtype2)) return name
def test_dtype_support(ctx, dtype): # Test passes if either context correctly reports that it does not support given dtype, # or it successfully compiles kernel that operates with this dtype. N = 256 if not ctx.supports_dtype(dtype): pytest.skip() module = ctx.compile(""" KERNEL void test( GLOBAL_MEM ${ctype} *dest, GLOBAL_MEM ${ctype} *a, GLOBAL_MEM ${ctype} *b) { const int i = get_global_id(0); ${ctype} temp = ${func.mul(dtype, dtype)}(a[i], b[i]); dest[i] = ${func.div(dtype, dtype)}(temp, b[i]); } """, render_kwds=dict(ctype=dtypes.ctype(dtype), dtype=dtype)) test = module.test # we need results to fit even in unsigned char a = get_test_array(N, dtype, high=8) b = get_test_array(N, dtype, no_zeros=True, high=8) a_dev = ctx.to_device(a) b_dev = ctx.to_device(b) dest_dev = ctx.empty_like(a_dev) test(dest_dev, a_dev, b_dev, global_size=N) assert diff_is_negligible(ctx.from_device(dest_dev), a)
def test_dtype_support(ctx, dtype): # Test passes if either context correctly reports that it does not support given dtype, # or it successfully compiles kernel that operates with this dtype. N = 256 if not ctx.supports_dtype(dtype): pytest.skip() module = ctx.compile( """ KERNEL void test( GLOBAL_MEM ${ctype} *dest, GLOBAL_MEM ${ctype} *a, GLOBAL_MEM ${ctype} *b) { const int i = get_global_id(0); ${ctype} temp = ${func.mul(dtype, dtype)}(a[i], b[i]); dest[i] = ${func.div(dtype, dtype)}(temp, b[i]); } """, render_kwds=dict(ctype=dtypes.ctype(dtype), dtype=dtype)) test = module.test # we need results to fit even in unsigned char a = get_test_array(N, dtype, high=8) b = get_test_array(N, dtype, no_zeros=True, high=8) a_dev = ctx.to_device(a) b_dev = ctx.to_device(b) dest_dev = ctx.empty_like(a_dev) test(dest_dev, a_dev, b_dev, global_size=N) assert diff_is_negligible(ctx.from_device(dest_dev), a)
def build_arglist(argnames): res = [] for argname in argnames: if argname in self.nodes: value = self.nodes[argname].value else: value = self.temp_nodes[argname].value dtype = value.dtype ctype = dtypes.ctype(dtype) res.append(("GLOBAL_MEM " if value.is_array else " ") + ctype + (" *" if value.is_array else " ") + leaf_name(argname)) return ", ".join(res)
def __init__(self, nodes, kind, number, name, node_type): self.label = kind + str(number + 1) self._name = name self.dtype = nodes[self._name].value.dtype self.ctype = dtypes.ctype(self.dtype) if node_type == NODE_INPUT: if kind == 'i': self.load = load_macro_call_tr(self._name) elif kind == 'o': self.store = "return" else: if kind == 'i': self.load = "val" elif kind == 'o': self.store = store_macro_call(self._name)
def __init__(self, name, dtype): self.dtype = dtype self.ctype = dtypes.ctype(dtype) self.load = load_macro_call(name) self.store = store_macro_call(name) self._name = name
def cast(self, out_dtype, in_dtype): out_ctype = dtypes.ctype(out_dtype) in_ctype = dtypes.ctype(in_dtype) name = "_{prefix}_cast_{out}_{in_}".format(prefix=self.prefix, out=out_ctype, in_=in_ctype) self.functions[name] = ('cast', (out_dtype, in_dtype)) return name
def process(name): if name in visited: return visited.add(name) node = self.nodes[name] if node.type == NODE_INPUT: if name in self.base_names: leaf_macro = base_leaf_load_macro node_macro = base_node_load_macro else: leaf_macro = leaf_load_macro node_macro = node_load_macro elif node.type == NODE_OUTPUT: if name in self.base_names: leaf_macro = base_leaf_store_macro node_macro = base_node_store_macro else: leaf_macro = leaf_store_macro node_macro = node_store_macro else: return if node.children is None: code_list.append("// leaf node " + node.name + "\n" + leaf_macro(node.name)) return for child in node.children: process(child) all_children = self.all_children(node.name) tr = node.tr_to_children if node.type == NODE_INPUT: definition = "INLINE WITHIN_KERNEL {outtype} {fname}({arglist}, int idx)".format( outtype=dtypes.ctype(node.value.dtype), fname=load_function_name(node.name), arglist=build_arglist(all_children)) input_names = node.children[:tr.inputs] scalar_names = node.children[tr.inputs:] args = {} for i, name in enumerate(input_names): arg = TransformationArgument(self.nodes, 'i', i, name, node.type) args[arg.label] = arg for i, name in enumerate(scalar_names): arg = TransformationArgument(self.nodes, 's', i, name, node.type) args[arg.label] = arg arg = TransformationArgument(self.nodes, 'o', 0, node.name, node.type) args[arg.label] = arg else: definition = "INLINE WITHIN_KERNEL void {fname}({arglist}, int idx, {intype} val)".format( intype=dtypes.ctype(node.value.dtype), fname=store_function_name(node.name), arglist=build_arglist(all_children)) output_names = node.children[:tr.outputs] scalar_names = node.children[tr.outputs:] args = {} for i, name in enumerate(output_names): arg = TransformationArgument(self.nodes, 'o', i, name, node.type) args[arg.label] = arg for i, name in enumerate(scalar_names): arg = TransformationArgument(self.nodes, 's', i, name, node.type) args[arg.label] = arg arg = TransformationArgument(self.nodes, 'i', 0, node.name, node.type) args[arg.label] = arg code_src = render_without_funcs(tr.code, func_c, **args) code_list.append("// node " + node.name + "\n" + definition + "\n{\n" + code_src + "\n}\n" + node_macro(node.name, all_children))