def fftshift(arr_t, axes=None): """ Returns a frequency shift transformation (1 output, 1 input) that works as ``output = numpy.fft.fftshift(input, axes=axes)``. .. warning:: Involves repositioning of the elements, so cannot be used on inplace kernels. """ if axes is None: axes = tuple(range(len(arr_t.shape))) else: axes = tuple(sorted(axes)) # The code taken from the FFTShift template for odd problem sizes # (at the moment of the writing). # Note the use of ``idxs`` template parameter to get access to element indices. return Transformation([ Parameter('output', Annotation(arr_t, 'o')), Parameter('input', Annotation(arr_t, 'i')) ], """ <% dimensions = len(output.shape) new_idx_names = ['new_idx' + str(i) for i in range(dimensions)] %> %for dim in range(dimensions): VSIZE_T ${new_idx_names[dim]} = ${idxs[dim]} %if dim in axes: %if output.shape[dim] % 2 == 0: + (${idxs[dim]} < ${output.shape[dim] // 2} ? ${output.shape[dim] // 2} : ${-output.shape[dim] // 2}) %else: + (${idxs[dim]} <= ${output.shape[dim] // 2} ? ${output.shape[dim] // 2} : ${-(output.shape[dim] // 2 + 1)}) %endif %endif ; %endfor ${output.ctype} val = ${input.load_same}; ${output.store_idx}(${', '.join(new_idx_names)}, val); """, connectors=['input'], render_kwds=dict(axes=axes))
def split_complex(input_arr_t): """ Returns a transformation that splits complex input into two real outputs (2 outputs, 1 input): ``real = Re(input), imag = Im(input)``. """ output_t = Type(dtypes.real_for(input_arr_t.dtype), shape=input_arr_t.shape) return Transformation([ Parameter('real', Annotation(output_t, 'o')), Parameter('imag', Annotation(output_t, 'o')), Parameter('input', Annotation(input_arr_t, 'i')) ], """ ${real.store_same}(${input.load_same}.x); ${imag.store_same}(${input.load_same}.y); """)
def copy(arr_t, out_arr_t=None): """ Returns an identity transformation (1 output, 1 input): ``output = input``. Output array type ``out_arr_t`` may have different strides, but must have the same shape and data type. """ if out_arr_t is None: out_arr_t = arr_t else: if out_arr_t.shape != arr_t.shape or out_arr_t.dtype != arr_t.dtype: raise ValueError("Input and output arrays must have the same shape and data type") return Transformation( [Parameter('output', Annotation(out_arr_t, 'o')), Parameter('input', Annotation(arr_t, 'i'))], "${output.store_same}(${input.load_same});")
def div_const(arr_t, param): """ Returns a scaling transformation with a fixed parameter (1 output, 1 input): ``output = input / param``. """ param_dtype = dtypes.detect_type(param) return Transformation( [ Parameter('output', Annotation(arr_t, 'o')), Parameter('input', Annotation(arr_t, 'i')) ], "${output.store_same}(${div}(${input.load_same}, ${param}));", render_kwds=dict(div=functions.div(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype), param=dtypes.c_constant(param, dtype=param_dtype)))
def broadcast_const(arr_t, val): """ Returns a transformation that broadcasts the given constant to the array output (1 output): ``output = val``. """ val = dtypes.cast(arr_t.dtype)(val) if len(val.shape) != 0: raise ValueError("The constant must be a scalar") return Transformation( [ Parameter('output', Annotation(arr_t, 'o'))], """ const ${output.ctype} val = ${dtypes.c_constant(val)}; ${output.store_same}(val); """, render_kwds=dict(val=val))
def prepare_irfft_output(arr): res = Type(dtypes.real_for(arr.dtype), arr.shape[:-1] + (arr.shape[-1] * 2, )) return Transformation([ Parameter('output', Annotation(res, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ <% batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1]) %> ${input.ctype} x = ${input.load_same}; ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2, x.x); ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1, x.y); """, connectors=['output'])
def combine_complex(output_arr_t): """ Returns a transformation that joins two real inputs into complex output (1 output, 2 inputs): ``output = real + 1j * imag``. """ input_t = Type(dtypes.real_for(output_arr_t.dtype), shape=output_arr_t.shape) return Transformation( [Parameter('output', Annotation(output_arr_t, 'o')), Parameter('real', Annotation(input_t, 'i')), Parameter('imag', Annotation(input_t, 'i'))], """ ${output.store_same}( COMPLEX_CTR(${output.ctype})( ${real.load_same}, ${imag.load_same})); """)
def unimod_gen(size, single=True): if single: dtype = np.complex64 else: dtype = np.complex128 unimod = Transformation([ Parameter('output', Annotation(Type(dtype, size), 'o')), Parameter('input', Annotation(Type(dtype, size), 'i')) ], ''' ${input.ctype} val = ${input.load_same}; ${output.store_same}(${polar_unit}(atan2(val.y, val.x))); ''', render_kwds=dict(polar_unit=functions.polar_unit( dtype=np.float32 if single else np.double))) return unimod
def prepare_rfft_input(arr): res = Type(dtypes.complex_for(arr.dtype), arr.shape[:-1] + (arr.shape[-1] // 2, )) return Transformation([ Parameter('output', Annotation(res, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ <% batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1]) %> ${input.ctype} re = ${input.load_idx}(${batch_idxs} ${idxs[-1]} * 2); ${input.ctype} im = ${input.load_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1); ${output.store_same}(COMPLEX_CTR(${output.ctype})(re, im)); """, connectors=['output'])
def get_prepare_for_mul_trf(shape): dtype = transformed_dtype() return Transformation( [ Parameter('output', Annotation(Type(dtype, shape), 'o')), Parameter('input', Annotation(Type(dtype, shape), 'i')) ], """ ${dtypes.ctype(dtype)} x = ${input.load_same}; ${ff_ctype} x_ff = { x }; ${output.store_same}(${prepare_for_mul}(x_ff).val); """, connectors=['input', 'output'], render_kwds=dict( prepare_for_mul=prepare_for_mul(ff_elem=ff_elem).module, dtype=dtype, ff_ctype=transformed_internal_ctype()))
def _build_plan(self, plan_factory, device_params, output, alpha, beta): plan = plan_factory() for_reduction = Type(numpy.float64, (alpha.shape[0], self._max_click_order)) meter_trf = Transformation([ Parameter('output', Annotation(for_reduction, 'o')), Parameter('alpha', Annotation(alpha, 'i')), Parameter('beta', Annotation(beta, 'i')), ], """ VSIZE_T sample_idx = ${idxs[0]}; VSIZE_T order = ${idxs[1]} + 1; ${alpha.ctype} result = COMPLEX_CTR(${alpha.ctype})(1, 0); for (VSIZE_T i = 0; i < ${modes}; i++) { ${alpha.ctype} alpha = ${alpha.load_idx}(sample_idx, i); ${beta.ctype} beta = ${beta.load_idx}(sample_idx, i); ${alpha.ctype} t = ${mul_cc}(alpha, beta); ${alpha.ctype} np = ${exp_c}(COMPLEX_CTR(${alpha.ctype})(-t.x, -t.y)); if (i >= order) { result = ${mul_cc}(result, np); } else { ${alpha.ctype} cp = COMPLEX_CTR(${alpha.ctype})(1 - np.x, -np.y); result = ${mul_cc}(result, cp); } } ${output.store_same}(result.x); """, render_kwds=dict( mul_cc=functions.mul(alpha.dtype, alpha.dtype), exp_c=functions.exp(alpha.dtype), modes=self._system.modes, )) reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,)) reduction.parameter.input.connect( meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta) plan.computation_call(reduction, output, alpha, beta) return plan
def crop_frequencies(arr): """ Crop a 2D array whose columns represent frequencies to only leave the frequencies with different absolute values. """ result_arr = Type(arr.dtype, (arr.shape[0], arr.shape[1] // 2 + 1)) return Transformation( [ Parameter('output', Annotation(result_arr, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ if (${idxs[1]} < ${input.shape[1] // 2 + 1}) ${output.store_idx}(${idxs[0]}, ${idxs[1]}, ${input.load_same}); """, # note that only the "load_same"-using argument can serve as a connector! connectors=['input'])
def copy_broadcasted(arr_t, out_arr_t=None): """ Returns an identity transformation (1 output, 1 input): ``output = input``, where ``input`` may be broadcasted (with the same semantics as ``numpy.broadcast_to()``). Output array type ``out_arr_t`` may have different strides, but must have compatible shapes the same shape and data type. .. note:: This is an input-only transformation. """ if out_arr_t is None: out_arr_t = arr_t if out_arr_t.dtype != arr_t.dtype: raise ValueError( "Input and output arrays must have the same data type") in_tp = Type.from_value(arr_t) out_tp = Type.from_value(out_arr_t) if not in_tp.broadcastable_to(out_tp): raise ValueError("Input is not broadcastable to output") return Transformation([ Parameter('output', Annotation(out_arr_t, 'o')), Parameter('input', Annotation(arr_t, 'i')) ], """ ${output.store_same}(${input.load_idx}( %for i in range(len(input.shape)): %if input.shape[i] == 1: 0 %else: ${idxs[i + len(output.shape) - len(input.shape)]} %endif %if i != len(input.shape) - 1: , %endif %endfor )); """, connectors=['output'])
def _build_plan(self, plan_factory, device_params, output, matrix, vector): plan = plan_factory() summation = Reduce(matrix, predicate_sum(matrix.dtype), axes=(len(matrix.shape)-1,)) mul_vec = Transformation([ Parameter('output', Annotation(matrix, 'o')), Parameter('matrix', Annotation(matrix, 'i')), Parameter('vector', Annotation(vector, 'i'))], """ ${output.store_same}(${mul}(${matrix.load_same}, ${vector.load_idx}(${idxs[-1]}))); """, render_kwds=dict(mul=functions.mul(matrix.dtype, vector.dtype)), connectors=['output', 'matrix']) summation.parameter.input.connect( mul_vec, mul_vec.output, matrix=mul_vec.matrix, vector=mul_vec.vector) plan.computation_call(summation, output, matrix, vector) return plan
def _build_plan(self, plan_factory, device_params, result, lwe_a, lwe_b, key): plan = plan_factory() mul_key = MatrixMulVector(lwe_a) fill_res = Transformation([ Parameter('result', Annotation(result, 'o')), Parameter('b', Annotation(lwe_b, 'i')), Parameter('a_times_key', Annotation(lwe_b, 'i'))], """ ${result.store_same}(${b.load_same} - ${a_times_key.load_same}); """, connectors=['a_times_key']) mul_key.parameter.output.connect( fill_res, fill_res.a_times_key, result=fill_res.result, b=fill_res.b) plan.computation_call(mul_key, result, lwe_b, lwe_a, key) return plan
def get_prepare_iprfft_input(X): # Input: size N//4 # Output: size N//4+1 N = X.shape[-1] * 4 Y = Type(X.dtype, X.shape[:-1] + (N // 4 + 1, )) return Transformation([ Parameter('Y', Annotation(Y, 'o')), Parameter('X', Annotation(X, 'i')), ], """ <% batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1]) %> ${Y.ctype} Y; if (${idxs[-1]} == 0) { ${X.ctype} X = ${X.load_idx}(${batch_idxs} 0); Y = COMPLEX_CTR(${Y.ctype})(-2 * X.y, 0); } else if (${idxs[-1]} == ${N//4}) { ${X.ctype} X = ${X.load_idx}(${batch_idxs} ${N//4-1}); Y = COMPLEX_CTR(${Y.ctype})(2 * X.y, 0); } else { ${X.ctype} X = ${X.load_idx}(${batch_idxs} ${idxs[-1]}); ${X.ctype} X_prev = ${X.load_idx}(${batch_idxs} ${idxs[-1]} - 1); ${X.ctype} diff = X - X_prev; Y = COMPLEX_CTR(${Y.ctype})(-diff.y, diff.x); } ${Y.store_same}(Y); """, connectors=['Y'], render_kwds=dict(N=N))
def norm_param(arr_t): """ Returns a transformation that calculates the ``order``-norm (1 output, 1 input, 1 param): ``output = abs(input) ** order``. """ if dtypes.is_complex(arr_t.dtype): out_dtype = dtypes.real_for(arr_t.dtype) else: out_dtype = arr_t.dtype return Transformation([ Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')), Parameter('input', Annotation(arr_t, 'i')), Parameter('order', Annotation(Type(out_dtype))) ], """ ${input.ctype} val = ${input.load_same}; ${output.ctype} norm = ${norm}(val); norm = pow(norm, ${order} / 2); ${output.store_same}(norm); """, render_kwds=dict(norm=functions.norm(arr_t.dtype)))
def _build_plan(self, plan_factory, device_params, result_a, result_b, result_cv, messages, key, noises_a, noises_b): plan = plan_factory() mul_key = MatrixMulVector(noises_a) fill_b_cv = Transformation([ Parameter('result_b', Annotation(result_b, 'o')), Parameter('result_cv', Annotation(result_cv, 'o')), Parameter('messages', Annotation(messages, 'i')), Parameter('noises_a_times_key', Annotation(noises_b, 'i')), Parameter('noises_b', Annotation(noises_b, 'i')) ], """ ${result_b.store_same}( ${noises_b.load_same} + ${messages.load_same} + ${noises_a_times_key.load_same}); ${result_cv.store_same}(${noise**2}); """, connectors=['noises_a_times_key'], render_kwds=dict(noise=self._noise)) mul_key.parameter.output.connect(fill_b_cv, fill_b_cv.noises_a_times_key, b=fill_b_cv.result_b, cv=fill_b_cv.result_cv, messages=fill_b_cv.messages, noises_b=fill_b_cv.noises_b) plan.computation_call(mul_key, result_b, result_cv, messages, noises_b, noises_a, key) plan.computation_call( PureParallel.from_trf(transformations.copy(noises_a)), result_a, noises_a) return plan
def _build_plan(self, plan_factory, device_params, output, alpha, beta): plan = plan_factory() for_reduction = Type(numpy.float64, (alpha.shape[0], self._max_moment)) meter_trf = Transformation([ Parameter('output', Annotation(for_reduction, 'o')), Parameter('alpha', Annotation(alpha, 'i')), Parameter('beta', Annotation(beta, 'i')), ], """ VSIZE_T sample_idx = ${idxs[0]}; VSIZE_T order = ${idxs[1]}; ${alpha.ctype} result = COMPLEX_CTR(${alpha.ctype})(1, 0); for (VSIZE_T i = 0; i <= order; i++) { ${alpha.ctype} alpha = ${alpha.load_idx}(sample_idx, i); ${beta.ctype} beta = ${beta.load_idx}(sample_idx, i); ${alpha.ctype} t = ${mul_cc}(alpha, beta); t.x -= ${ordering}; result = ${mul_cc}(result, t); } ${output.store_same}(result.x); """, render_kwds=dict( mul_cc=functions.mul(alpha.dtype, alpha.dtype), ordering=ordering(self._representation), )) reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,)) reduction.parameter.input.connect( meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta) plan.computation_call(reduction, output, alpha, beta) return plan
def _build_plan(self, plan_factory, device_params, result, phase): plan = plan_factory() tr = Transformation([ Parameter('result', Annotation(result, 'o')), Parameter('phase', Annotation(phase, 'i')), ], """ <% interv = 2**32 // mspace_size half_interv = interv // 2 %> ${phase.ctype} phase = ${phase.load_same}; ${result.store_same}(((unsigned int)phase + ${half_interv}) / ${interv}); """, render_kwds=dict(mspace_size=self._mspace_size, uint64=dtypes.ctype( numpy.uint64)), connectors=['result', 'phase']) plan.computation_call( PureParallel.from_trf(tr, guiding_array='result'), result, phase) return plan
def get_tgsw_polynomial_decomp_trf(params: 'TGswParams', shape): tlwe_params = params.tlwe_params decomp_length = params.decomp_length mask_size = tlwe_params.mask_size polynomial_degree = tlwe_params.polynomial_degree result = Type(Int32, shape + (mask_size + 1, decomp_length, polynomial_degree)) sample = Type(Torus32, shape + (mask_size + 1, polynomial_degree)) return Transformation([ Parameter('result', Annotation(result, 'o')), Parameter('sample', Annotation(sample, 'i'))], """ <% mask = 2**params.bs_log2_base - 1 half_base = 2**(params.bs_log2_base - 1) %> ${sample.ctype} sample = ${sample.load_idx}(${", ".join(idxs[:-2])}, ${idxs[-1]}); int decomp_shift = 32 - (${idxs[-2]} + 1) * ${params.bs_log2_base}; ${result.store_same}( (((sample + (${params.offset})) >> decomp_shift) & ${mask}) - ${half_base} ); """, connectors=['results'], render_kwds=dict(params=params))
def test_io_parameter_in_transformation(): with pytest.raises(ValueError): tr = Transformation( [Parameter('o1', Annotation(Type(numpy.float32, shape=100), 'io'))], "${o1.store_same}(${o1.load_same});")
def tr_identity(arr): return Transformation( [Parameter('o1', Annotation(arr, 'o')), Parameter('i1', Annotation(arr, 'i'))], "${o1.store_same}(${i1.load_same});")
result.cur_min = ${v2}.cur_min; if (${v2}.cur_max > result.cur_max) result.cur_max = ${v2}.cur_max; return result; """, render_kwds=dict(ctype=mmc_c_decl)), empty) # Test array arr = numpy.random.randint(0, 10**6, 20000) # A transformation that creates initial minmax structures for the given array of integers to_mmc = Transformation([ Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')), Parameter('input', Annotation(arr, 'i')) ], """ ${output.ctype} res; res.cur_min = ${input.load_same}; res.cur_max = ${input.load_same}; ${output.store_same}(res); """) # Create the reduction computation and attach the transformation above to its input. reduction = Reduce(to_mmc.output, predicate) reduction.parameter.input.connect(to_mmc, to_mmc.output, new_input=to_mmc.input) creduction = reduction.compile(thr) # Run the computation arr_dev = thr.to_device(arr) res_dev = thr.empty_like(reduction.parameter.output)
def _build_plan(self, plan_factory, device_params, output, alpha, beta): plan = plan_factory() samples, modes = alpha.shape for_reduction = Type(alpha.dtype, (samples, self._max_total_clicks + 1)) prepared_state = plan.temp_array_like(alpha) plan.kernel_call( TEMPLATE.get_def("compound_click_probability_prepare"), [prepared_state, alpha, beta], kernel_name="compound_click_probability_prepare", global_size=alpha.shape, render_kwds=dict( mul_cc=functions.mul(alpha.dtype, alpha.dtype), exp_c=functions.exp(alpha.dtype), )) # Block size is limited by the amount of available local memory. # In some OpenCL implementations the number reported cannot actually be fully used # (because it's used by kernel arguments), so we're padding it a little. local_mem_size = device_params.local_mem_size max_elems = (local_mem_size - 256) // alpha.dtype.itemsize block_size = 2**helpers.log2(max_elems) # No reason to have block size larger than the number of modes block_size = min(block_size, helpers.bounding_power_of_2(modes)) products_gsize = (samples, helpers.min_blocks(self._max_total_clicks + 1, block_size) * block_size) products = plan.temp_array_like(for_reduction) read_size = min(block_size, device_params.max_work_group_size) while read_size > 1: full_steps = modes // block_size remainder_size = modes % block_size try: plan.kernel_call( TEMPLATE.get_def("compound_click_probability_aggregate"), [products, prepared_state], kernel_name="compound_click_probability_aggregate", global_size=products_gsize, local_size=(1, read_size,), render_kwds=dict( block_size=block_size, read_size=read_size, full_steps=full_steps, remainder_size=remainder_size, output_size=self._max_total_clicks + 1, mul_cc=functions.mul(alpha.dtype, alpha.dtype), add_cc=functions.add(alpha.dtype, alpha.dtype), polar_unit=functions.polar_unit(dtypes.real_for(alpha.dtype)), modes=self._system.modes, max_total_clicks=self._max_total_clicks, )) except OutOfResourcesError: read_size //= 2 break reduction = Reduce(for_reduction, predicate_sum(alpha.dtype), axes=(0,)) temp = plan.temp_array_like(reduction.parameter.output) plan.computation_call(reduction, temp, products) fft = FFT(temp) real_trf = Transformation([ Parameter('output', Annotation(output, 'o')), Parameter('input', Annotation(temp, 'i')), ], """ ${input.ctype} val = ${input.load_same}; ${output.store_same}(val.x); """) fft.parameter.output.connect(real_trf, real_trf.input, output_p=real_trf.output) plan.computation_call(fft, output, temp, True) return plan
def get_procs(thr, N): fft = FFTFactory.create(thr, (N, ), compile_=False) unimod_trans = Transformation( [ Parameter('output', Annotation(Type(np.complex128, N), 'o')), Parameter('input', Annotation(Type(np.complex128, N), 'i')) ], """ VSIZE_T idx = ${idxs[0]}; ${input.ctype} val = ${input.load_same}; if (idx>${N}/2){ val.x = 0.0; val.y = 0.0; ${output.store_same}(val); }else ${output.store_same}(${polar_unit}(atan2(val.y, val.x))); """, render_kwds=dict(polar_unit=functions.polar_unit(dtype=np.float64), N=N)) fft.parameter.output.connect(unimod_trans, unimod_trans.input, uni=unimod_trans.output) fft_unimod = fft.compile(thr) mag_square = PureParallel([ Parameter('output', Annotation(Type(np.complex128, N), 'o')), Parameter('input', Annotation(Type(np.complex128, N), 'i')) ], ''' VSIZE_T idx = ${idxs[0]}; ${input.ctype} val = ${input.load_idx}(idx); val.x = val.x*val.x + val.y*val.y; val.y = 0; ${output.store_idx}(idx, val); ''') mag_square = mag_square.compile(thr) apply_mask = PureParallel( [ Parameter('output', Annotation(Type(np.complex128, N), 'o')), Parameter('origin', Annotation(Type(np.complex128, N), 'i')), Parameter('mask', Annotation(Type(np.double, N), 'i')) ], ''' VSIZE_T idx = ${idxs[0]}; ${output.store_idx}(idx, ${mul}(${origin.load_idx}(idx), ${mask.load_idx}(idx))); ''', render_kwds=dict(mul=functions.mul(np.complex128, np.double))) apply_mask = apply_mask.compile(thr) combine_mag_phi = PureParallel([ Parameter('output', Annotation(Type(np.complex128, N), 'o')), Parameter('mag_square', Annotation(Type(np.complex128, N), 'i')), Parameter('phase', Annotation(Type(np.complex128, N), 'i')) ], ''' VSIZE_T idx = ${idxs[0]}; double r = ${mag_square.load_idx}(idx).x; r = r<0.0 ? 0.0 : ${pow}(r, 0.5); double2 v = ${phase.load_idx}(idx); double angle = atan2(v.y, v.x); ${output.store_idx}(idx, ${polar}(r, angle)); ''', render_kwds=dict( pow=functions.pow(np.double), polar=functions.polar(np.double))) combine_mag_phi = combine_mag_phi.compile(thr) return fft_unimod, mag_square, apply_mask, combine_mag_phi
def closing(img, numiter=1): return erode(dilate(img, numiter), numiter) def border(img): return repeat_kernel(img, prg.border, 1) # Create LUT and stringify into preamble of map kernel LUT = np.zeros(256, np.int32) for b in xrange(8): LUT[(np.arange(256) & (1 << b)) != 0] += 1 strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n" byte_to_count = Transformation([ Parameter('output', Annotation(Type(np.int32, (1, )), 'o')), Parameter('input', Annotation(Type(np.uint8, (1, )), 'i')) ], strLUT + """ ${output.store_same}(LUT[${input.load_same}]); """) predicate = Predicate( Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0)) sum_bits_reduction = Reduce(byte_to_count.output, predicate) sum_bits_reduction.parameter.input.connect(byte_to_count, byte_to_count.output, new_input=byte_to_count.input) sum_bits = sum_bits_reduction.compile(thr) #sum_byte_count = ReductionKernel(cx, np.int32, neutral="0", # reduce_expr="a+b", map_expr="LUT[bytes[i]]", # arguments="__global unsigned char *bytes", # preamble=strLUT)
def _build_plan(self, plan_factory, device_params, result_a, result_cv, key, noises1, noises2): plan = plan_factory() polynomial_degree = self._polynomial_degree batch_shape = result_a.shape[:-2] batch_len = helpers.product(batch_shape) perf_params = self._perf_params transform = get_transform(self._transform_type) ft_key = transform.ForwardTransform(key.shape[:-1], polynomial_degree, perf_params) key_tr = plan.temp_array_like(ft_key.parameter.output) ft_noises = transform.ForwardTransform(noises1.shape[:-1], polynomial_degree, perf_params) noises1_tr = plan.temp_array_like(ft_noises.parameter.output) ift = transform.InverseTransform(noises1.shape[:-1], polynomial_degree, perf_params) ift_res = plan.temp_array_like(ift.parameter.output) mul_tr = Transformation( [ Parameter('output', Annotation(ift.parameter.input, 'o')), Parameter('key', Annotation(key_tr, 'i')), Parameter('noises1', Annotation(noises1_tr, 'i')) ], """ ${output.store_same}(${tr_ctype}unpack(${mul}( ${tr_ctype}pack(${key.load_idx}(${idxs[-2]}, ${idxs[-1]})), ${tr_ctype}pack(${noises1.load_same}) ))); """, connectors=['output', 'noises1'], render_kwds=dict(mul=transform.transformed_mul(perf_params), tr_ctype=transform.transformed_internal_ctype())) ift.parameter.input.connect(mul_tr, mul_tr.output, key=mul_tr.key, noises1=mul_tr.noises1) plan.computation_call(ft_key, key_tr, key) plan.computation_call(ft_noises, noises1_tr, noises1) plan.computation_call(ift, ift_res, key_tr, noises1_tr) plan.kernel_call(TEMPLATE.get_def("tlwe_encrypt_zero_fill_result"), [result_a, result_cv, noises1, noises2, ift_res], global_size=(batch_len, self._mask_size + 1, polynomial_degree), render_kwds=dict(noise=self._noise, mask_size=self._mask_size, noises1_slices=(len(batch_shape), 1, 1), noises2_slices=(len(batch_shape), 1), cv_slices=(len(batch_shape), ))) return plan