def transform(self, tree, program_config): arg_cfg = program_config[0] self.entry_point = unique_kernel_name() ctypeObject = c_float() ctype = c_float len_x = arg_cfg[0][1][0] len_y = arg_cfg[0][1][1] output = unique_name() params = [ SymbolRef("input", POINTER(ctype)(), _global=True, _const=True), SymbolRef(output, POINTER(ctype)(), _global=True) ] defn = [] defn.extend([ Assign(SymbolRef('x', c_int()), get_global_id(0)), Assign(SymbolRef('y', c_int()), get_global_id(1)), Assign(SymbolRef('temp', ctypeObject), Constant(0)), ]) body = \ """ temp = .5 * input[clamp(x/2, 0, (len_x / 2) - 1) * len_y + clamp(y/2, 0, (len_y / 2) - 1)] if (x & 0x1): temp += .25 * input[clamp(x/2 + 1, 0, (len_x / 2) - 1) * len_y + clamp(y/2, 0, (len_y / 2) - 1)] else: temp += .25 * input[clamp(x/2 - 1, 0, (len_x / 2) - 1) * len_y + clamp(y/2, 0, (len_y / 2) - 1)] if (y & 0x1): temp += .25 * input[clamp(x/2, 0, (len_x / 2) - 1) * len_y + clamp(y/2 + 1, 0, (len_y / 2) - 1)] else: temp += .25 * input[clamp(x/2, 0, (len_x / 2) - 1) *len_y + clamp(y/2 - 1, 0, (len_y / 2) - 1)] output[x * len_y + y] = temp """ body = ast.parse(body).body name_dict = { 'output': output } const_dict = { 'len_x': len_x, 'len_y': len_y, } transformation = PyBasicConversions(name_dict, const_dict) defn.extend(body) tree = FunctionDecl(None, self.entry_point, params, defn) tree.set_kernel() kernel = OclFile("kernel", [tree]) kernel = transformation.visit(kernel) return kernel
def transform(self, tree, program_config): call_args = program_config[0] base_size = call_args.base_shape[0] * call_args.base_shape[1] border = call_args.border c_float_type = c_float c_int_type = c_int transformer = PyBasicConversions() output = unique_name() init_entry_point = unique_kernel_name() init_params = [ SymbolRef('input', POINTER(c_float_type)(), _global=True, _const=True), SymbolRef(output, POINTER(c_float_type)(), _global=True), ] init_defn = [] init_defn.extend([ Assign(SymbolRef('x', c_int()), get_global_id(0)), Assign(SymbolRef('y', c_int()), get_global_id(1)), ]) body = """{output}[y * {len_x} + x] = input[y * {len_x} + x]""".format( output=output, len_x=call_args.base_shape[0] ) print(body) tree_body = ast.parse(body).body init_defn.extend(tree_body) init_tree = FunctionDecl(None, init_entry_point, init_params, init_defn) init_tree.set_kernel() init_kernel = OclFile('kernel', [init_tree]) init_kernel = transformer.visit(init_kernel) print("init kernel codegen") print(init_kernel.codegen()) compute_entry_point = unique_kernel_name() compute_params = [ SymbolRef(output, POINTER(c_float_type)(), _global=True), SymbolRef('power', c_int(), _const=True), ] compute_defn = [] compute_defn.extend([ Assign(SymbolRef('x', c_int()), get_global_id(0)), Assign(SymbolRef('y', c_int()), get_global_id(1)), ]) body = """{matrix}[(power+1) * {base_size} + y * {len_x} + x] = 0.1 * {matrix}[ power * {base_size} + clamp(y-1, {border}, {len_y}-{border}-1) * {len_x} + clamp(x, {border}, {len_x}-{border}-1) ] + 0.1 * {matrix}[ power * {base_size} + clamp(y+1, {border}, {len_y}-{border}-1) * {len_x} + clamp(x, {border}, {len_x}-{border}-1) ] + 0.4 * {matrix}[ power * {base_size} + clamp(y, {border}, {len_y}-{border}-1) * {len_x} + clamp(x-1, {border}, {len_x}-{border}-1) ] + 0.4 * {matrix}[ power * {base_size} + clamp(y, {border}, {len_y}-{border}-1) * {len_x} + clamp(x+1, {border}, {len_x}-{border}-1) ] + 1.0 * {matrix}[ power * {base_size} + clamp(y, {border}, {len_y}-{border}-1) * {len_x} + clamp(x, {border}, {len_x}-{border}-1) ] """.format( matrix=output, base_size=base_size, len_y=call_args.base_shape[0], len_x=call_args.base_shape[1], border=border, ) body = re.sub("""\s\s*""", " ", body) print(body) tree_body = ast.parse(body).body compute_defn.extend(tree_body) compute_tree = FunctionDecl(None, compute_entry_point, compute_params, compute_defn) compute_tree.set_kernel() compute_kernel = OclFile('kernel', [compute_tree]) compute_kernel = transformer.visit(compute_kernel) print("compute kernel codegen") print(compute_kernel.codegen()) fn = OclMatrixPowers() init_program = clCreateProgramWithSource(fn.context, init_kernel.codegen()).build() init_ptr = init_program[init_entry_point] compute_program = clCreateProgramWithSource(fn.context, compute_kernel.codegen()).build() compute_ptr = compute_program[compute_entry_point] return fn.finalize(init_ptr, compute_ptr, (call_args.base_shape[1], call_args.base_shape[0]))