def alloc_array(self, elt_t, dims, name = "array", explicit_struct = False): """ Given an element type and sequence of expressions denoting each dimension size, generate code to allocate an array and its shape/strides metadata. For now I'm assuming that all arrays are in row-major, eventually we should make the layout an option. """ if self.is_tuple(dims): shape = dims dims = self.tuple_elts(shape) else: if not isinstance(dims, (list, tuple)): dims = [dims] shape = self.tuple(dims, "shape", explicit_struct = explicit_struct) rank = len(dims) array_t = array_type.make_array_type(elt_t, rank) if explicit_struct: nelts = self.prod(dims, name = "nelts") ptr_t = core_types.ptr_type(elt_t) ptr_var = self.assign_temp(Alloc(elt_t, nelts, type = ptr_t), "data_ptr") stride_elts = [syntax_helpers.const(1)] # assume row-major for now! for d in reversed(dims[1:]): next_stride = self.mul(stride_elts[0], d, "dim") stride_elts = [next_stride] + stride_elts strides = self.tuple(stride_elts, "strides", explicit_struct = True) array = Struct([ptr_var, shape, strides, zero_i64, nelts], type = array_t) else: array = AllocArray(shape, type = array_t) return self.assign_temp(array, name)
def _create_wrapper(self, n_pos, static_pairs, dynamic_keywords): args = FormalArgs() pos_vars = [] keyword_vars = {} for i in xrange(n_pos): local_name = names.fresh("input_%d" % i) args.add_positional(local_name) pos_vars.append(syntax.Var(local_name)) for visible_name in dynamic_keywords: local_name = names.fresh(visible_name) args.add_positional(local_name, visible_name) keyword_vars[visible_name] = syntax.Var(local_name) for (static_name, value) in static_pairs: if isinstance(value, syntax.Expr): assert isinstance(value, syntax.Const) keyword_vars[static_name] = value elif value is not None: assert syntax_helpers.is_python_constant(value), \ "Unexpected type for static/staged value: %s : %s" % \ (value, type(value)) keyword_vars[static_name] = syntax_helpers.const(value) result_expr = self.f(*pos_vars, **keyword_vars) body = [syntax.Return(result_expr)] wrapper_name = "%s_wrapper_%d_%d" % (self.name, n_pos, len(dynamic_keywords)) wrapper_name = names.fresh(wrapper_name) return syntax.Fn(name = wrapper_name, args = args, body = body)
def value_to_syntax(v): if syntax_helpers.is_python_constant(v): return syntax_helpers.const(v) elif isinstance(v, np.dtype): x = names.fresh("x") fn_name = names.fresh("cast") formals = FormalArgs() formals.add_positional(x, "x") body = [syntax.Return(syntax.Cast(syntax.Var(x), type=core_types.from_dtype(v)))] return syntax.Fn(fn_name, formals, body) else: assert is_function_value(v), "Can't make value %s : %s into static syntax" % (v, type(v)) return translate_function_value(v)
def transform_Array(self, expr): array_t = expr.type elt_t = array_t.elt_type assert array_t.rank > 0 if array_t.rank == 1: new_elts = [self.coerce_expr(elt, elt_t) for elt in expr.elts] return syntax.Array(new_elts, type = array_t) else: # need to allocate an output array and copy the elements in first_elt = self.assign_temp(expr.elts[0], "first_elt") elt_dims = [self.shape(first_elt, i) for i in xrange(array_t.rank - 1)] n = len(expr.elts) outer_dim = syntax_helpers.const(n) all_dims = (outer_dim,) + tuple(elt_dims) array = self.alloc_array(elt_t, all_dims, "array_literal") for i, elt in enumerate(expr.elts): idx_expr = self.index(array, i, temp = False) # transform indexing to make missing indices explicit self.assign(idx_expr, expr.elts[i]) return array
def transform_TiledReduce(self, expr): args = expr.args axes = expr.axes # TODO: Should make sure that all the shapes conform here, # but we don't yet have anything like assertions or error handling. niters = self.shape(args[0], syntax_helpers.unwrap_constant(axes[0])) if expr.fixed_tile_size: self.fixed_idx += 1 tile_size = syntax_helpers.const(self.fixed_tile_sizes[self.fixed_idx]) else: self.tiling = True self.fn.has_tiles = True self.nesting_idx += 1 tile_size = self.index(self.tile_sizes_param, self.nesting_idx, temp = True, name = "tilesize") slice_t = array_type.make_slice_type(Int64, Int64, Int64) untiled_map_fn = self.get_fn(expr.fn) acc_type = untiled_map_fn.return_type acc_is_array = not isinstance(acc_type, ScalarT) tiled_map_fn = self.transform_TypedFn(untiled_map_fn) map_closure_args = [self.get_closure_arg(e) for e in self.closure_elts(expr.fn)] untiled_combine = self.get_fn(expr.combine) combine_closure_args = [] tiled_combine = self.transform_TypedFn(untiled_combine, acc_is_array) if self.output_var and acc_is_array: result = self.output_var else: shape_args = map_closure_args + args result = self._create_output_array(untiled_map_fn, shape_args, [], "loop_result") init = result rslt_t = result.type if not acc_is_array: result_before = self.fresh_var(rslt_t, "result_before") init = result_before # Lift the initial value and fill it. def init_unpack(i, cur): if i == 0: return syntax.Assign(cur, syntax_helpers.zero_f64) else: j = self.fresh_i64("j") start = zero_i64 stop = self.shape(cur, 0) self.blocks.push() n = self.index_along_axis(cur, 0, j) self.blocks += init_unpack(i-1, n) body = self.blocks.pop() return syntax.ForLoop(j, start, stop, one_i64, body, {}) num_exps = array_type.get_rank(init.type) - \ array_type.get_rank(expr.init.type) # TODO: Get rid of this when safe to do so. if not expr.fixed_tile_size or True: self.comment("TiledReduce in %s: init_unpack" % self.fn.name) self.blocks += init_unpack(num_exps, init) # Loop over the remaining tiles. merge = {} if not acc_is_array: result_after = self.fresh_var(rslt_t, "result_after") merge[result.name] = (result_before, result_after) def make_loop(start, stop, step, do_min = True): i = self.fresh_var(niters.type, "i") self.blocks.push() slice_stop = self.add(i, step, "next_bound") slice_stop_min = self.min(slice_stop, stop) if do_min \ else slice_stop tile_bounds = syntax.Slice(i, slice_stop_min, one_i64, type = slice_t) nested_args = [self.index_along_axis(arg, axis, tile_bounds) for arg, axis in zip(args, axes)] new_acc = self.fresh_var(tiled_map_fn.return_type, "new_acc") self.comment("TiledReduce in %s: map_fn " % self.fn.name) do_inline(tiled_map_fn, map_closure_args + nested_args, self.type_env, self.blocks.top(), result_var = new_acc) loop_body = self.blocks.pop() if acc_is_array: outidx = self.tuple([syntax_helpers.slice_none] * result.type.rank) result_slice = self.index(result, outidx, temp = False) self.comment("") do_inline(tiled_combine, combine_closure_args + [result, new_acc, result_slice], self.type_env, loop_body, result_var = None) else: do_inline(tiled_combine, combine_closure_args + [result, new_acc], self.type_env, loop_body, result_var = result_after) return syntax.ForLoop(i, start, stop, step, loop_body, merge) assert isinstance(tile_size, syntax.Expr), "%s not an expr" % tile_size self.comment("TiledReduce in %s: combine" % self.fn.name) if expr.fixed_tile_size and \ config.opt_reg_tiles_not_tile_size_dependent and \ syntax_helpers.unwrap_constant(tile_size) > 1: num_tiles = self.div(niters, tile_size, "num_tiles") tile_stop = self.mul(num_tiles, tile_size, "tile_stop") loop1 = make_loop(zero_i64, tile_stop, tile_size, False) self.blocks.append(loop1) loop2_start = self.assign_temp(loop1.var, "loop2_start") self.blocks.append(make_loop(loop2_start, niters, one_i64, False)) else: self.blocks.append(make_loop(zero_i64, niters, tile_size)) return result
def transform_TiledMap(self, expr): args = expr.args axes = expr.axes # TODO: Should make sure that all the shapes conform here, # but we don't yet have anything like assertions or error handling niters = self.shape(expr.args[0], syntax_helpers.unwrap_constant(axes[0])) # Create the tile size variable and find the number of tiles if expr.fixed_tile_size: self.fixed_idx += 1 tile_size = syntax_helpers.const(self.fixed_tile_sizes[self.fixed_idx]) else: self.tiling = True self.fn.has_tiles = True self.nesting_idx += 1 tile_size = self.index(self.tile_sizes_param, self.nesting_idx, temp = True, name = "tilesize") untiled_inner_fn = self.get_fn(expr.fn) if isinstance(untiled_inner_fn.return_type, ScalarT): tiled_inner_fn = self.transform_TypedFn(untiled_inner_fn) else: tiled_inner_fn = self.transform_TypedFn(untiled_inner_fn, preallocate_output = True) nested_has_tiles = tiled_inner_fn.has_tiles # Increase the nesting_idx by the number of tiles in the nested fn self.nesting_idx += tiled_inner_fn.num_tiles slice_t = array_type.make_slice_type(Int64, Int64, Int64) closure_args = [self.get_closure_arg(e) for e in self.closure_elts(expr.fn)] if self.output_var and \ not isinstance(untiled_inner_fn.return_type, ScalarT): array_result = self.output_var else: shape_args = closure_args + expr.args array_result = self._create_output_array(untiled_inner_fn, shape_args, [], "array_result") assert self.output_var is None or \ self.output_var.type.__class__ is ArrayT, \ "Invalid output var %s : %s" % \ (self.output_var, self.output_var.type) def make_loop(start, stop, step, do_min = True): i = self.fresh_var(niters.type, "i") self.blocks.push() slice_stop = self.add(i, step, "slice_stop") slice_stop_min = self.min(slice_stop, niters, "slice_min") if do_min \ else slice_stop tile_bounds = syntax.Slice(i, slice_stop_min, one_i64, type = slice_t) nested_args = [self.index_along_axis(arg, axis, tile_bounds) for arg, axis in zip(args, axes)] out_idx = self.fixed_idx if expr.fixed_tile_size else self.nesting_idx output_region = self.index_along_axis(array_result, out_idx, tile_bounds) nested_args.append(output_region) if nested_has_tiles: nested_args.append(self.tile_sizes_param) body = self.blocks.pop() do_inline(tiled_inner_fn, closure_args + nested_args, self.type_env, body, result_var = None) return syntax.ForLoop(i, start, stop, step, body, {}) assert isinstance(tile_size, syntax.Expr) self.comment("TiledMap in %s" % self.fn.name) if expr.fixed_tile_size and \ config.opt_reg_tiles_not_tile_size_dependent and \ syntax_helpers.unwrap_constant(tile_size) > 1: num_tiles = self.div(niters, tile_size, "num_tiles") tile_stop = self.mul(num_tiles, tile_size, "tile_stop") loop1 = make_loop(zero_i64, tile_stop, tile_size, False) self.blocks.append(loop1) loop2_start = self.assign_temp(loop1.var, "loop2_start") self.blocks.append(make_loop(loop2_start, niters, one_i64, False)) else: self.blocks.append(make_loop(zero_i64, niters, tile_size)) return array_result
def visit_Const(self, v): return syntax_helpers.const(v.value)