def finalize(self, transform_result, program_config): tree = transform_result[0] entry_type = CFUNCTYPE(None) fn = GreeterFunction() return fn.finalize(Project([tree]), "hello", entry_type)
def finalize(self, files, program_cfg): arg_cfg, tune_cfg = program_cfg proj = Project(files) entry_type = (None, ) for param in files[0].body[-1].params: if "self_" in param.name: arg = getattr(arg_cfg[0], param.name[5:]) arg = arr_cfg(arg.shape, arg.dtype) else: for index, p in enumerate( self.original_tree.body[0].args.args): if sys.version_info > (3, 0): _id = p.arg else: _id = p.id if _id == param.name: arg = arg_cfg[index] break if isinstance(arg, arr_cfg): entry_type += (np.ctypeslib.ndpointer(arg.dtype, len(arg.shape), arg.shape), ) else: raise NotImplementedError() entry_type = ct.CFUNCTYPE(*entry_type) return ConcreteMeta(files[0].name, proj, entry_type, files[0].body[-1].params, self.original_tree.body[0].args.args)
def finalize(self, transform_result, program_config): tree = transform_result[0] proj = Project([tree]) arg_config = program_config[0] A = arg_config['ptr'] entry_point_typesig = CFUNCTYPE(None, A) return BasicFunction("apply_all", proj, entry_point_typesig)
def finalize(self, files, program_cfg): arg_cfg, tune_cfg = program_cfg n, dtype = arg_cfg['n'], arg_cfg['dtype'] array_type = np.ctypeslib.ndpointer(dtype, 2, (n, n)) entry_type = (None, array_type, array_type, array_type, ct.POINTER(ct.c_double)) entry_type = ct.CFUNCTYPE(*entry_type) return ConcreteMatMul('dgemm', Project(files), entry_type, self)
def finalize(self, files, program_cfg): arg_cfg, tune_cfg = program_cfg proj = Project(files) entry_type = (None, ) for name in ['in_ptr', 'weights', 'bias', 'out']: entry_type += (arg_cfg[name], ) fn = ConvConcrete('conv', proj, ct.CFUNCTYPE(*entry_type)) return fn
def finalize(self, transform_result, program_config): c_file = transform_result[0] proj = Project(transform_result) entry_point_name = 'mandelbrot' entry_point_typesig = ctypes.CFUNCTYPE( None, ctypes.c_long, ctypes.c_long, np.ctypeslib.ndpointer(np.int32, 1, (program_config[0], ))) fn = CSF() return fn.finalize(entry_point_name, proj, entry_point_typesig)
def finalize(self, transform_result, program_config): c_translator = transform_result[0] proj = Project([c_translator]) arg_config, tuner_config = program_config arg_type = arg_config['arg_type'] entry_type = ct.CFUNCTYPE(arg_type, arg_type) return BasicFunction("apply", proj, entry_type)
def finalize(self, transform_result, program_config): c_doubler = transform_result[0] proj = Project([c_doubler]) arg_config, tuner_config = program_config array_type = arg_config['ptr'] entry_type = ct.CFUNCTYPE(None, array_type) concrete_Fn = ArrayFn() return concrete_Fn.finalize("apply_all", proj, entry_type)
def finalize(self, transform_result, program_config): tree = transform_result[0] # Get the argument type data input_data = program_config[0] pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape) entry_type = CFUNCTYPE(None, pointer, pointer, pointer) # Instantiation of the concrete function fn = ConcreteElemWiseArrayArrayOp() return fn.finalize(Project([tree]), FUNC_NAME, entry_type)
def finalize(self, transform_result, program_config): c_dgemm = transform_result[0] proj = Project([c_dgemm]) arg_config, tuner_config = program_config n, dtype = arg_config['n'], arg_config['dtype'] array_type = np.ctypeslib.ndpointer(dtype, 2, (n, n)) entry_type = ct.CFUNCTYPE(None, array_type, array_type, array_type, POINTER(c_double)) concrete_Fn = ConcreteDgemm() return concrete_Fn.finalize("dgemm", proj, entry_type)
def finalize(self, files, program_cfg): arg_cfg, tune_cfg = program_cfg proj = Project(files) channels, height, width = arg_cfg[0] height_col = (height + 2 * self.pad_h - self.kernel_h) // \ self.stride_h + 1 width_col = (width + 2 * self.pad_w - self.kernel_w) // \ self.stride_w + 1 out_shape = (channels * self.kernel_h * self.kernel_w, height_col * width_col) out_ptr = np.ctypeslib.ndpointer(arg_cfg[1]._dtype_, 2, out_shape) entry_type = ct.CFUNCTYPE(None, arg_cfg[1], out_ptr) return ConcreteIm2Col('im2col', proj, entry_type, out_shape)
def finalize(self, transform_result, program_config): tree = transform_result[0] # Get the argument type data input_data = program_config[0] pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape) scalar_data_type_referenced = get_c_type_from_numpy_dtype( np.dtype(input_data.scalar_type)) entry_type = CFUNCTYPE(None, pointer, scalar_data_type_referenced, pointer) # Instantiation of the concrete function fn = ConcreteElemWiseArrayScalarOp() return fn.finalize(Project([tree]), FUNC_NAME, entry_type)
def transform(self, py_ast, program_config): tree = CFile("generated", [ CppInclude("omp.h"), CppInclude("stdio.h"), FunctionDecl(Void(), "hello", params=[], defn=[ OmpParallel( [OmpNumThreadsClause(Constant(4))] ), printf(r"Hello from thread %d of %d.\n", \ omp_get_thread_num(), omp_get_num_threads()), ] ), ]) entry_point_typesig = tree.find(FunctionDecl, name="hello").get_type().as_ctype() return Project([tree]), entry_point_typesig
def finalize(self, files, program_cfg): arg_cfg, tune_cfg = program_cfg entry_type = (None, ) for cfg in arg_cfg: if isinstance(cfg, ArrayCfg): entry_type += (np.ctypeslib.ndpointer(cfg.dtype, len(cfg.shape), cfg.shape), ) elif type(cfg) in {np.float32, float}: entry_type += (ct.c_float, ) elif isinstance(cfg, int): entry_type += (ct.c_int, ) else: raise NotImplementedError() entry_type = ct.CFUNCTYPE(*entry_type) return ConcreteFn(files[0].name, Project(files), entry_type, self.output)
def finalize(self, files, program_cfg): arg_cfg, tune_cfg = program_cfg proj = Project(files) out_ptr = np.ctypeslib.ndpointer(arg_cfg[1]._dtype_, 3, self.shape) entry_type = ct.CFUNCTYPE(None, arg_cfg[1], out_ptr) return ConcreteCol2Im('col2im', proj, entry_type, self.shape)
def finalize(self, transform_result, program_config): generated = transform_result[0] print(generated) proj = Project([generated]) entry_type = ct.CFUNCTYPE(None, *program_config[0]) return HwachaFN().finalize("apply", proj, entry_type)
def visit_Module(self, node): body = [self.visit(s) for s in node.body] return Project([CFile("module", body)])
def finalize(self, transform_result, program_config): project = Project(transform_result) arg_config, tuner_config = program_config self.output = self.generate_output(program_config) param_types = [ np.ctypeslib.ndpointer(arg.dtype, arg.ndim, arg.shape) for arg in arg_config + (self.output, ) ] if self.backend == StencilOclTransformer: entry_point = "stencil_control" param_types.append(param_types[0]) entry_type = [c_int32, cl.cl_command_queue, cl.cl_kernel] if self.kernel.is_copied: for _ in range(self.kernel.dim): entry_type.append(cl.cl_kernel) entry_type.extend(cl_mem for _ in range(len(arg_config) + 1)) entry_type = CFUNCTYPE(*entry_type) else: entry_point = "stencil_kernel" param_types.append(POINTER(c_float)) entry_type = CFUNCTYPE(c_int32, *param_types) if self.backend == StencilOclTransformer: concrete_function = OclStencilFunction() if self.kernel.is_copied: args = [ project, entry_type, entry_point, ] kernels = [] for index, kernel in enumerate(project.find_all(OclFile)): # print("XXX index {} kernel {}".format(index, kernel.name)) print("Kernel Codegen\n".format(kernel.codegen())) program = clCreateProgramWithSource( concrete_function.context, kernel.codegen()).build() if index == 0: ocl_kernel_name = 'stencil_kernel' else: ocl_kernel_name = kernel.name kernel_ptr = program[ocl_kernel_name] kernels.append(kernel_ptr) args.append(kernels) args.append(self.output) finalized = concrete_function.finalize(*args) else: kernel = project.find(OclFile) program = clCreateProgramWithSource(concrete_function.context, kernel.codegen()).build() stencil_kernel_ptr = program['stencil_kernel'] finalized = concrete_function.finalize( project, entry_type, entry_point, stencil_kernel_ptr, self.output ) else: concrete_function = ConcreteStencil() finalized = concrete_function.finalize(project, entry_point, entry_type, self.output) self.output = None self.fusable_nodes = [] return finalized concrete_function = ConcreteStencil() return concrete_function.finalize(entry_point, project, entry_type)