def predicate_sum(dtype): """ Returns a :py:class:`~reikna.algorithms.Predicate` object which sums its arguments. """ return Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), numpy.zeros(1, dtype)[0])
def _build_plan( self, plan_factory, device_params, ks_a, ks_b, ks_cv, in_key, out_key, noises_a, noises_b): plan = plan_factory() extracted_n, t, base, inner_n = ks_a.shape mul_key = MatrixMulVector(noises_a) b_term = plan.temp_array_like(mul_key.parameter.output) build_keyswitch = PureParallel([ Parameter('ks_a', Annotation(ks_a, 'o')), Parameter('ks_b', Annotation(ks_b, 'o')), Parameter('ks_cv', Annotation(ks_cv, 'o')), Parameter('in_key', Annotation(in_key, 'i')), Parameter('b_term', Annotation(b_term, 'i')), Parameter('noises_a', Annotation(noises_a, 'i')), Parameter('noises_b', Annotation(noises_b, 'i'))], Snippet( TEMPLATE.get_def("make_lwe_keyswitch_key"), render_kwds=dict( log2_base=self._log2_base, output_size=self._output_size, noise=self._noise)), guiding_array="ks_b") plan.computation_call(mul_key, b_term, noises_a, out_key) plan.computation_call( build_keyswitch, ks_a, ks_b, ks_cv, in_key, b_term, noises_a, noises_b) return plan
def test_structure_type(thr): shape = (100, 100) dtype = dtypes.align( numpy.dtype([('i1', numpy.uint32), ('nested', numpy.dtype([ ('v', numpy.uint64), ])), ('i2', numpy.uint32)])) a = get_test_array(shape, dtype) a_dev = thr.to_device(a) # Have to construct the resulting array manually, # since numpy cannot reduce arrays with struct dtypes. b_ref = numpy.empty(100, dtype) b_ref['i1'] = a['i1'].sum(0) b_ref['nested']['v'] = a['nested']['v'].sum(0) b_ref['i2'] = a['i2'].sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; result.i1 += ${v2}.i1; result.nested.v += ${v2}.nested.v; result.i2 += ${v2}.i2; return result; """, render_kwds=dict(ctype=dtypes.ctype_module(dtype))), numpy.zeros(1, dtype)[0]) rd = Reduce(a_dev, predicate, axes=(0, )) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) b_res = b_dev.get() # Array.get() runs numpy.lib.stride_tricks.as_strided() on the array, # which adds dummy variables instead of custom offsets (and our `dtype` has them), # making the result dtype different, and failing the test. # For now we will just note the difference and convert the result # back to the original dtype (they are still compatible). # When the behavior changes, the test will start to fail and we will notice. # See inducer/compyte issue #26. wrong_dtype = b_res.dtype != b_dev.dtype b_res = b_res.astype(dtype) assert diff_is_negligible(b_res, b_ref) if wrong_dtype: pytest.xfail("as_strided() still corrupts the datatype") else: pytest.fail("as_strided() does not corrupt the datatype anymore, " "we can remove the `astype()` now")
def test_nondefault_function(thr): shape = (100, 100) a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) b_ref = a.sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0) rd = Reduce(a_dev, predicate, axes=(0, )) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_nondefault_function(thr): shape = (100, 100) a = get_test_array(shape, numpy.int64) a_dev = thr.to_device(a) b_ref = a.sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0) rd = Reduce(a_dev, predicate, axes=(0,)) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) assert diff_is_negligible(b_dev.get(), b_ref)
def test_structure_type(thr): shape = (100, 100) dtype = dtypes.align(numpy.dtype([ ('i1', numpy.uint32), ('nested', numpy.dtype([ ('v', numpy.uint64), ])), ('i2', numpy.uint32) ])) a = get_test_array(shape, dtype) a_dev = thr.to_device(a) # Have to construct the resulting array manually, # since numpy cannot reduce arrays with struct dtypes. b_ref = numpy.empty(100, dtype) b_ref['i1'] = a['i1'].sum(0) b_ref['nested']['v'] = a['nested']['v'].sum(0) b_ref['i2'] = a['i2'].sum(0) predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; result.i1 += ${v2}.i1; result.nested.v += ${v2}.nested.v; result.i2 += ${v2}.i2; return result; """, render_kwds=dict( ctype=dtypes.ctype_module(dtype))), numpy.zeros(1, dtype)[0]) rd = Reduce(a_dev, predicate, axes=(0,)) b_dev = thr.empty_like(rd.parameter.output) rdc = rd.compile(thr) rdc(b_dev, a_dev) b_res = b_dev.get() assert diff_is_negligible(b_res, b_ref)
def test_scan_structure_type(thr, exclusive): shape = (100, 100) dtype = dtypes.align( numpy.dtype([('i1', numpy.uint32), ('nested', numpy.dtype([ ('v', numpy.uint64), ])), ('i2', numpy.uint32)])) a = get_test_array(shape, dtype) a_dev = thr.to_device(a) # Have to construct the resulting array manually, # since numpy cannot scan arrays with struct dtypes. b_ref = numpy.empty(shape, dtype) b_ref['i1'] = ref_scan(a['i1'], axes=0, exclusive=exclusive) b_ref['nested']['v'] = ref_scan(a['nested']['v'], axes=0, exclusive=exclusive) b_ref['i2'] = ref_scan(a['i2'], axes=0, exclusive=exclusive) predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; result.i1 += ${v2}.i1; result.nested.v += ${v2}.nested.v; result.i2 += ${v2}.i2; return result; """, render_kwds=dict(ctype=dtypes.ctype_module(dtype))), numpy.zeros(1, dtype)[0]) scan = Scan(a_dev, predicate, axes=(0, ), exclusive=exclusive) b_dev = thr.empty_like(scan.parameter.output) scanc = scan.compile(thr) scanc(b_dev, a_dev) b_res = b_dev.get() assert diff_is_negligible(b_res, b_ref)
def __init__(self, parameters, code, guiding_array=None, render_kwds=None): Computation.__init__(self, parameters) self._root_parameters = list(self.signature.parameters.keys()) if isinstance(code, Snippet): self._snippet = code else: self._snippet = Snippet(helpers.template_def( ['idxs'] + self._root_parameters, code), render_kwds=render_kwds) if guiding_array is None: guiding_array = self._root_parameters[0] if isinstance(guiding_array, str): self._guiding_shape = self.signature.parameters[ guiding_array].annotation.type.shape else: self._guiding_shape = guiding_array
def __init__(self, parameters, code, render_kwds=None, connectors=None): for param in parameters: if param.annotation.input and param.annotation.output: raise ValueError( "Transformation cannot have 'io' parameters ('" + param.name + "')") self.signature = Signature(parameters) for param in self.signature.parameters.values(): setattr( self, param.name, TransformationParameter(self, param.name, param.annotation.type)) if connectors is not None: self.connectors = connectors else: self.connectors = [param.name for param in parameters if param.annotation.array] tr_param_names = ['idxs'] + [param.name for param in self.signature.parameters.values()] self.snippet = Snippet(template_def(tr_param_names, code), render_kwds=render_kwds)
def _build_plan( self, plan_factory, device_params, ks_a, ks_b, ks_cv, in_key, out_key, noises_a, noises_b): plan = plan_factory() extracted_n, t, base, inner_n = ks_a.shape mean = Reduce(noises_b, predicate_sum(noises_b.dtype)) norm = transformations.div_const(mean.parameter.output, numpy.prod(noises_b.shape)) mean.parameter.output.connect(norm, norm.input, mean=norm.output) noises_b_mean = plan.temp_array_like(mean.parameter.mean) mul_key = MatrixMulVector(noises_a) b_term = plan.temp_array_like(mul_key.parameter.output) build_keyswitch = PureParallel([ Parameter('ks_a', Annotation(ks_a, 'o')), Parameter('ks_b', Annotation(ks_b, 'o')), Parameter('ks_cv', Annotation(ks_cv, 'o')), Parameter('in_key', Annotation(in_key, 'i')), Parameter('b_term', Annotation(b_term, 'i')), Parameter('noises_a', Annotation(noises_a, 'i')), Parameter('noises_b', Annotation(noises_b, 'i')), Parameter('noises_b_mean', Annotation(noises_b_mean, 'i'))], Snippet( TEMPLATE.get_def("make_lwe_keyswitch_key"), render_kwds=dict( log2_base=self._log2_base, output_size=self._output_size, double_to_t32=double_to_t32_module, noise=self._noise)), guiding_array="ks_b") plan.computation_call(mean, noises_b_mean, noises_b) plan.computation_call(mul_key, b_term, noises_a, out_key) plan.computation_call( build_keyswitch, ks_a, ks_b, ks_cv, in_key, b_term, noises_a, noises_b, noises_b_mean) return plan
def combinator_call(dtype, m1num=1, m2num=1, snum=1): c = combinator(dtype, m1num=m1num, m2num=m2num, snum=snum) return Snippet(TEMPLATE.get_def('snippet'), render_kwds=dict(c=c))
mmc_c_decl = dtypes.ctype_module(mmc_dtype) # Create the "empty" element for our minmax monoid, that is # x `minmax` empty == empty `minmax` x == x. empty = numpy.empty(1, mmc_dtype)[0] empty["cur_min"] = 1 << 30 empty["cur_max"] = -(1 << 30) # Reduction predicate for the minmax. # v1 and v2 get the names of two variables to be processed. predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; if (${v2}.cur_min < result.cur_min) result.cur_min = ${v2}.cur_min; if (${v2}.cur_max > result.cur_max) result.cur_max = ${v2}.cur_max; return result; """, render_kwds=dict(ctype=mmc_c_decl)), empty) # Test array arr = numpy.random.randint(0, 10**6, 20000) # A transformation that creates initial minmax structures for the given array of integers to_mmc = Transformation([ Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')), Parameter('input', Annotation(arr, 'i')) ], """ ${output.ctype} res; res.cur_min = ${input.load_same};
def test_scan_custom_predicate(thr): predicate = Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0) check_scan(thr, (10, 20, 30, 40), axes=(1,2), predicate=predicate)
# General utilities for OpenCL arrays from .gpu import * from reikna.core import Type from reikna.cluda import Snippet from reikna.algorithms.reduce import Reduce, Predicate prg = build_program(["maximum_filter", "taxicab_distance"]) def maximum_filter_kernel(img): maximum = thr.empty_like(img) maximum.fill(0) prg.maximum_filter(img, maximum, global_size=map(int, img.shape[::-1]), local_size=(1, 1)) return maximum max_snippet = Snippet.create(lambda a, b: """ return ((${a}) > (${b})) ? (${a}) : (${b}); """) def max_kernel(arr): max_func = Reduce(arr, Predicate(max_snippet, np.array([-10.0**9],np.float32)[0])).compile(thr) out = thr.empty_like(Type(np.float32)) max_func(out, arr) return out.get().item()
VALUE_NAME=VALUE_NAME, module_idx=module_idx, nq_indices=index_cnames_seq(param), nq_params=param_cnames_seq(subtree_parameters))) _snippet_disassemble_combined = Snippet.create( lambda shape, slices, indices, combined_indices: """ %for combined_index, slice_len in enumerate(slices): <% index_start = sum(slices[:combined_index]) index_end = index_start + slice_len %> %for index in range(index_start, index_end): <% stride = product(shape[index+1:index_end]) %> VSIZE_T ${indices[index]} = ${combined_indices[combined_index]} / ${stride}; %if index != index_end - 1: ${combined_indices[combined_index]} -= ${indices[index]} * ${stride}; %endif %endfor %endfor """, render_kwds=dict(product=helpers.product)) _module_combined = helpers.template_def( ['prefix', 'slices'], """ <% value_param = [str(connector_ctype) + ' ' + VALUE_NAME] if output else []
def test_scan_custom_predicate(thr): predicate = Predicate( Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), 0) check_scan(thr, (10, 20, 30, 40), axes=(1, 2), predicate=predicate)
from .gpu import * from reikna.core import Type from reikna.cluda import Snippet from reikna.algorithms.reduce import Reduce, Predicate prg = build_program(["maximum_filter", "taxicab_distance"]) def maximum_filter_kernel(img): maximum = thr.empty_like(img) maximum.fill(0) prg.maximum_filter(img, maximum, global_size=map(int, img.shape[::-1]), local_size=(1, 1)) return maximum max_snippet = Snippet.create(lambda a, b: """ return ((${a}) > (${b})) ? (${a}) : (${b}); """) def max_kernel(arr): max_func = Reduce( arr, Predicate(max_snippet, np.array([-10.0**9], np.float32)[0])).compile(thr) out = thr.empty_like(Type(np.float32)) max_func(out, arr) return out.get().item()
# Create the "empty" element for our minmax monoid, that is # x `minmax` empty == empty `minmax` x == x. empty = numpy.empty(1, mmc_dtype)[0] empty["cur_min"] = 1 << 30 empty["cur_max"] = -(1 << 30) # Reduction predicate for the minmax. # v1 and v2 get the names of two variables to be processed. predicate = Predicate( Snippet.create(lambda v1, v2: """ ${ctype} result = ${v1}; if (${v2}.cur_min < result.cur_min) result.cur_min = ${v2}.cur_min; if (${v2}.cur_max > result.cur_max) result.cur_max = ${v2}.cur_max; return result; """, render_kwds=dict(ctype=mmc_c_decl)), empty) # Test array arr = numpy.random.randint(0, 10**6, 20000) # A transformation that creates initial minmax structures for the given array of integers to_mmc = Transformation( [Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')), Parameter('input', Annotation(arr, 'i'))],
VALUE_NAME=VALUE_NAME, module_idx=module_idx, nq_indices=index_cnames_seq(param), nq_params=param_cnames_seq(subtree_parameters))) _snippet_disassemble_combined = Snippet.create( lambda shape, slices, indices, combined_indices: """ %for combined_index, slice_len in enumerate(slices): <% index_start = sum(slices[:combined_index]) index_end = index_start + slice_len %> %for index in range(index_start, index_end): <% stride = product(shape[index+1:index_end]) %> VSIZE_T ${indices[index]} = ${combined_indices[combined_index]} / ${stride}; %if index != index_end - 1: ${combined_indices[combined_index]} -= ${indices[index]} * ${stride}; %endif %endfor %endfor """, render_kwds=dict(product=helpers.product)) _module_combined = helpers.template_def(['prefix', 'slices'], """ <% value_param = [str(connector_ctype) + ' ' + VALUE_NAME] if output else [] value = [VALUE_NAME] if output else []
# Create LUT and stringify into preamble of map kernel LUT = np.zeros(256, np.int32) for b in xrange(8): LUT[(np.arange(256) & (1 << b)) != 0] += 1 strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n" byte_to_count = Transformation([ Parameter('output', Annotation(Type(np.int32, (1, )), 'o')), Parameter('input', Annotation(Type(np.uint8, (1, )), 'i')) ], strLUT + """ ${output.store_same}(LUT[${input.load_same}]); """) predicate = Predicate( Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0)) sum_bits_reduction = Reduce(byte_to_count.output, predicate) sum_bits_reduction.parameter.input.connect(byte_to_count, byte_to_count.output, new_input=byte_to_count.input) sum_bits = sum_bits_reduction.compile(thr) #sum_byte_count = ReductionKernel(cx, np.int32, neutral="0", # reduce_expr="a+b", map_expr="LUT[bytes[i]]", # arguments="__global unsigned char *bytes", # preamble=strLUT) #def count_bits(img): # return sum_byte_count(img).get().item() # #pixel_inds = GenericScanKernel(cx, np.int32, # arguments="__global unsigned char *bytes, "
def kernel_declaration(kernel_name, parameters): return Snippet(_snippet_kernel_declaration, render_kwds=dict(param_cnames_seq=param_cnames_seq, kernel_name=kernel_name, parameters=parameters))
def predicate_sum(dtype): """ Returns a :py:class:`~reikna.algorithms.Predicate` object which sums its arguments. """ return Predicate(Snippet.create(lambda v1, v2: "return ${v1} + ${v2};"), numpy.zeros(1, dtype)[0])
LUT = np.zeros(256, np.int32) for b in xrange(8): LUT[(np.arange(256) & (1 << b)) != 0] += 1 strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n" byte_to_count = Transformation( [ Parameter("output", Annotation(Type(np.int32, (1,)), "o")), Parameter("input", Annotation(Type(np.uint8, (1,)), "i")), ], strLUT + """ ${output.store_same}(LUT[${input.load_same}]); """, ) predicate = Predicate(Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0)) sum_bits_reduction = Reduce(byte_to_count.output, predicate) sum_bits_reduction.parameter.input.connect(byte_to_count, byte_to_count.output, new_input=byte_to_count.input) sum_bits = sum_bits_reduction.compile(thr) # sum_byte_count = ReductionKernel(cx, np.int32, neutral="0", # reduce_expr="a+b", map_expr="LUT[bytes[i]]", # arguments="__global unsigned char *bytes", # preamble=strLUT) # def count_bits(img): # return sum_byte_count(img).get().item() # # pixel_inds = GenericScanKernel(cx, np.int32, # arguments="__global unsigned char *bytes, " # "int image_w, " # "__global int *pixels",