def transform(self, tree, program_config): subconfig, tuning_config = program_config name_shape_map = {name: arg.shape for name, arg in subconfig.items()} shapes = set(name_shape_map.values()) self.parent_cls.IndexOpToEncode(name_shape_map).visit(tree) encode_funcs = [] c_tree = PyBasicConversions().visit(tree) # print(dump(c_tree)) for shape in shapes: encode_funcs.append(generate_encode_macro('encode'+CCompiler._shape_to_str(shape), shape)) components = [] for target, ispace in zip(self.target_names, c_tree.body): shape = subconfig[target].shape sub = fill_iteration_spaces(ispace, shape) sub = self.parent_cls.IterationSpaceExpander(self.index_name, shape).visit(sub) sub = self.parent_cls.BlockConverter().visit(sub) components.append(sub) c_func = FunctionDecl( name=SymbolRef("kernel"), params=[ SymbolRef(name=arg_name, sym_type=get_ctype( arg if not isinstance(arg, np.ndarray) else arg.ravel() ), _restrict=True) for arg_name, arg in subconfig.items() ], defn=components ) TilingOptimization().optimize(c_func, self.tile_size) includes = [ CppInclude("stdint.h") ] out_file = CFile(body=includes + encode_funcs + [c_func]) return out_file
def visit_FunctionDef(self, node): self.decls = {} node.defn = [self.visit(s) for s in node.body] new_params = [] for param in node.args.args: if sys.version_info > (3, 0): _id = param.arg else: _id = param.id if _id == 'self': continue value = self.symbol_table[_id] if isinstance(value, Array): _type = np.ctypeslib.ndpointer( value.dtype, value.ndim, value.shape)() else: _type = get_ctype(value) new_params.append( C.SymbolRef(_id, _type)) for name, value in self.decls.items(): if isinstance(value, Array): type = np.ctypeslib.ndpointer( value.dtype, value.ndim, value.shape)() value = value.ctypes.data new_params.append( C.SymbolRef(name, type)) else: if value is True: value = 1 type = ct.c_int() elif value is False: value = 0 type = ct.c_int() else: type = get_ctype(value) node.body.insert( 0, C.Assign(C.SymbolRef(name, type), C.Constant(value))) node.args.args = new_params return node
def visit_FunctionDef(self, node): self.decls = {} node.defn = [self.visit(s) for s in node.body] new_params = [] for param in node.args.args: if sys.version_info > (3, 0): _id = param.arg else: _id = param.id if _id == 'self': continue value = self.symbol_table[_id] if isinstance(value, Array): _type = np.ctypeslib.ndpointer(value.dtype, value.ndim, value.shape)() else: _type = get_ctype(value) new_params.append(C.SymbolRef(_id, _type)) for name, value in self.decls.items(): if isinstance(value, Array): type = np.ctypeslib.ndpointer(value.dtype, value.ndim, value.shape)() value = value.ctypes.data new_params.append(C.SymbolRef(name, type)) else: if value is True: value = 1 type = ct.c_int() elif value is False: value = 0 type = ct.c_int() else: type = get_ctype(value) node.body.insert( 0, C.Assign(C.SymbolRef(name, type), C.Constant(value))) node.args.args = new_params return node
def visit_For(self, node): if node.pragma == "ivdep": block = [] loopvar = node.incr.arg size = node.test.right scalars = get_scalars_in_body(node) refs = get_array_references_in_body(node) ref_register_map = {} scalar_register_map = {} for index, ref in enumerate(refs): ref_register_map[str(ref)] = (ref, "va{}".format(index)) for index, scalar in enumerate(scalars): reg = "vs{}".format(index) scalar_register_map[scalar] = reg self.type_map[reg] = get_ctype(scalar) body = [] block.append(StringTemplate(hwacha_configure_block.format(SIZE=size))) node.incr = C.AddAssign(loopvar, C.SymbolRef("vector_length")) self.defns.append(get_asm_body(node, scalar_register_map, ref_register_map, self.type_map)) block.append(node) body.append(StringTemplate(bounds_check.format(SIZE=size, loopvar=loopvar))) for scalar in scalars: body.append(scalar_init(scalar)) body.append(StringTemplate(obtained_vector_length.format(SIZE=size, loopvar=loopvar))) block1 = "" block2 = "" index = 0 for _, info in ref_register_map.items(): ref, register = info block1 += "\t \"vmsa {0}, %{1}\\n\"\n".format(register, index) block2 += "\"r\"({0} + {1}),\n".format( ref.left.name, ref.right.name) index += 1 for scalar, register in scalar_register_map.items(): block1 += "\t \"vmss {0}, %{1}\\n\"\n".format(register, index) block2 += "\"r\"({0}.i),\n".format( "".join(number_dict[digit] for digit in str(scalar))) index += 1 block1 += "\"fence\\n\"\n" block1 += "\"vf 0(%{0})\\n\"\n".format(index) block2 += "\"r\" (&__hwacha_body)" body.append(StringTemplate( """ __asm__ volatile( {block1} : : {block2} : "memory" ); """.format(block1=block1, block2=block2))) node.body = body block.append( StringTemplate(""" __asm__ volatile( "fence\\n" ); """)) return block
def visit_Constant(self, node): self.type_map[node.value] = get_ctype(node.value) return self.scalars[node.value]
def get_type(self, env=None): return get_ctype(self.value)
def test_bad_type(self): class Bad(object): pass with self.assertRaises(ValueError): ty = get_ctype(Bad())
def test_bool(self): ty = get_ctype(True) self.assertIsInstance(ty, ctypes.c_bool)
def test_char(self): ty = get_ctype("c") self.assertIsInstance(ty, ctypes.c_char)
def args_to_subconfig(self, args): return {'arg_typesig': tuple(type(get_ctype(arg)) for arg in args)}
def test_int_array_1d(self): ty = get_ctype(np.arange(10, dtype=np.int32)) tree = SymbolRef("i", ty) self._check_code(tree, "int* i")
def test_int_array_2d(self): ty = get_ctype(np.arange(10, dtype=np.float32).reshape(2, 5)) tree = SymbolRef("i", ty) self._check_code(tree, "float** i")
def test_int_array(self): ty = get_ctype(np.arange(10, dtype=np.int32)) self.assertIsInstance(ty, _ctypes.Array)
def test_int(self): ty = get_ctype(123) self.assertIsInstance(ty, ctypes.c_long)
def test_float(self): ty = get_ctype(456.7) self.assertIsInstance(ty, ctypes.c_double)
def get_type(self): return get_ctype(self.value)
def test_none(self): ty = get_ctype(None) self.assertIsInstance(ty, type(None))
def args_to_subconfig(self, args): return {'arg_type': type(get_ctype(args[0]))}
def test_string(self): self.assertIsInstance(get_ctype("foo"), ctypes.c_char_p) self.assertIsInstance(get_ctype(""), ctypes.c_char_p) self.assertIsInstance(get_ctype("one two"), ctypes.c_char_p)
def test_int_array_2d(self): ty = get_ctype(np.arange(10, dtype=np.float32).reshape(2,5)) tree = SymbolRef("i", ty) self._check_code(tree, "float** i")