def _gen_llvm_function_body(self, ctx, builder, params, state, arg_in, arg_out, *, tags: frozenset): # Instantiate needed ctypes arg_in_ct = pnlvm._convert_llvm_ir_to_ctype(arg_in.type.pointee) params_ct = pnlvm._convert_llvm_ir_to_ctype(params.type.pointee) state_ct = pnlvm._convert_llvm_ir_to_ctype(state.type.pointee) arg_out_ct = pnlvm._convert_llvm_ir_to_ctype(arg_out.type.pointee) wrapper_ct = ctypes.CFUNCTYPE(None, ctypes.POINTER(params_ct), ctypes.POINTER(state_ct), ctypes.POINTER(arg_in_ct), ctypes.POINTER(arg_out_ct)) # we don't support passing any stateful params for i, p in enumerate(self.llvm_state_ids): assert p not in self.cust_fct_params def _carr_to_list(carr): try: return [_carr_to_list(x) for x in carr] except TypeError: return carr def _assign_to_carr(carr, vals): assert len(carr) == len(vals) for i, x in enumerate(vals): try: carr[i] = x except TypeError: _assign_to_carr(carr[i], x) def _wrapper(params, state, arg_in, arg_out): variable = _carr_to_list(arg_in.contents) llvm_params = {} for i, p in enumerate(self.llvm_param_ids): if p in self.cust_fct_params: field_name = params.contents._fields_[i][0] val = getattr(params.contents, field_name) llvm_params[p] = val if self.context_arg: # FIXME: We can't get the context # and do not support runtime params llvm_params[CONTEXT] = None llvm_params[PARAMS] = None value = self.custom_function(np.asfarray(variable), **llvm_params) _assign_to_carr(arg_out.contents, np.atleast_2d(value)) self.__wrapper_f = wrapper_ct(_wrapper) # To get the right callback pointer, we need to cast to void* wrapper_address = ctypes.cast(self.__wrapper_f, ctypes.c_void_p) # Direct pointer constants don't work wrapper_ptr = builder.inttoptr( pnlvm.ir.IntType(64)(wrapper_address.value), builder.function.type) builder.call(wrapper_ptr, [params, state, arg_in, arg_out]) return builder
def test_helper_fclamp_const(mode): with pnlvm.LLVMBuilderContext() as ctx: local_vec = copy.deepcopy(VECTOR) double_ptr_ty = ctx.float_ty.as_pointer() func_ty = ir.FunctionType(ir.VoidType(), (double_ptr_ty, ctx.int32_ty)) # Create clamp function custom_name = ctx.get_unique_name("clamp") function = ir.Function(ctx.module, func_ty, name=custom_name) vec, count = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) index = None with pnlvm.helpers.for_loop_zero_inc(builder, count, "linear") as (b1, index): val_ptr = b1.gep(vec, [index]) val = b1.load(val_ptr) val = pnlvm.helpers.fclamp(b1, val, TST_MIN, TST_MAX) b1.store(val, val_ptr) builder.ret_void() ref = np.clip(VECTOR, TST_MIN, TST_MAX) bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': ct_ty = pnlvm._convert_llvm_ir_to_ctype(double_ptr_ty) ct_vec = local_vec.ctypes.data_as(ct_ty) bin_f(ct_vec, DIM_X) else: bin_f.cuda_wrap_call(local_vec, np.int32(DIM_X)) assert np.array_equal(local_vec, ref)
def test_helper_is_close(mode): with pnlvm.LLVMBuilderContext() as ctx: double_ptr_ty = ctx.float_ty.as_pointer() func_ty = ir.FunctionType( ir.VoidType(), [double_ptr_ty, double_ptr_ty, double_ptr_ty, ctx.int32_ty]) # Create clamp function custom_name = ctx.get_unique_name("all_close") function = ir.Function(ctx.module, func_ty, name=custom_name) in1, in2, out, count = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) index = None with pnlvm.helpers.for_loop_zero_inc(builder, count, "compare") as (b1, index): val1_ptr = b1.gep(in1, [index]) val2_ptr = b1.gep(in2, [index]) val1 = b1.load(val1_ptr) val2 = b1.load(val2_ptr) close = pnlvm.helpers.is_close(b1, val1, val2) out_ptr = b1.gep(out, [index]) out_val = b1.select(close, ctx.float_ty(1), ctx.float_ty(0)) b1.store(out_val, out_ptr) builder.ret_void() vec1 = copy.deepcopy(VECTOR) tmp = np.random.rand(DIM_X) tmp[0::2] = vec1[0::2] vec2 = np.asfarray(tmp) assert len(vec1) == len(vec2) res = np.empty_like(vec2) ref = np.isclose(vec1, vec2) bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': ct_ty = pnlvm._convert_llvm_ir_to_ctype(double_ptr_ty) ct_vec1 = vec1.ctypes.data_as(ct_ty) ct_vec2 = vec2.ctypes.data_as(ct_ty) ct_res = res.ctypes.data_as(ct_ty) bin_f(ct_vec1, ct_vec2, ct_res, DIM_X) else: bin_f.cuda_wrap_call(vec1, vec2, res, np.int32(DIM_X)) assert np.array_equal(res, ref)
def test_helper_all_close(mode): with pnlvm.LLVMBuilderContext() as ctx: arr_ptr_ty = ir.ArrayType(ctx.float_ty, DIM_X).as_pointer() func_ty = ir.FunctionType( ir.VoidType(), [arr_ptr_ty, arr_ptr_ty, ir.IntType(32).as_pointer()]) custom_name = ctx.get_unique_name("all_close") function = ir.Function(ctx.module, func_ty, name=custom_name) in1, in2, out = function.args block = function.append_basic_block(name="entry") builder = ir.IRBuilder(block) all_close = pnlvm.helpers.all_close(builder, in1, in2) res = builder.select(all_close, out.type.pointee(1), out.type.pointee(0)) builder.store(res, out) builder.ret_void() vec1 = copy.deepcopy(VECTOR) vec2 = copy.deepcopy(VECTOR) ref = np.allclose(vec1, vec2) bin_f = pnlvm.LLVMBinaryFunction.get(custom_name) if mode == 'CPU': ct_ty = pnlvm._convert_llvm_ir_to_ctype(arr_ptr_ty) ct_vec1 = vec1.ctypes.data_as(ct_ty) ct_vec2 = vec2.ctypes.data_as(ct_ty) res = ctypes.c_int32() bin_f(ct_vec1, ct_vec2, ctypes.byref(res)) else: res = np.array([5], dtype=np.int32) bin_f.cuda_wrap_call(vec1, vec2, res) res = res[0] assert np.array_equal(res, ref)