def map_subscript(self, expr, enclosing_prec, type_context): from loopy.kernel.data import TemporaryVariable ary = self.find_array(expr) if isinstance(ary, TemporaryVariable): gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: lsize, = lsize from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info( self.kernel.target, ary, expr.index, lambda expr: evaluate( expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) subscript, = access_info.subscripts result = self.parenthesize_if_needed( "%s[programIndex + %s]" % (access_info.array_name, self.rec(lsize * subscript, PREC_SUM, 'i')), enclosing_prec, PREC_CALL) if access_info.vector_index is not None: return self.kernel.target.add_vector_access( result, access_info.vector_index) else: return result return super(ExprToISPCMapper, self).map_subscript(expr, enclosing_prec, type_context)
def map_subscript(self, expr, type_context): from loopy.kernel.data import TemporaryVariable ary = self.find_array(expr) if (isinstance(ary, TemporaryVariable) and ary.scope == temp_var_scope.PRIVATE): # generate access code for acccess to private-index temporaries gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: lsize, = lsize from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info( self.kernel.target, ary, expr.index, lambda expr: evaluate( expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) subscript, = access_info.subscripts result = var( access_info.array_name)[var("programIndex") + self.rec(lsize * subscript, 'i')] if access_info.vector_index is not None: return self.kernel.target.add_vector_access( result, access_info.vector_index) else: return result return super(ExprToISPCExprMapper, self).map_subscript(expr, type_context)
def map_subscript(self, expr, type_context): from loopy.kernel.data import TemporaryVariable ary = self.find_array(expr) if (isinstance(ary, TemporaryVariable) and ary.address_space == AddressSpace.PRIVATE): # generate access code for acccess to private-index temporaries gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: lsize, = lsize from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info(self.kernel.target, ary, expr.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) subscript, = access_info.subscripts result = var(access_info.array_name)[ var("programIndex") + self.rec(lsize*subscript, 'i')] if access_info.vector_index is not None: return self.kernel.target.add_vector_access( result, access_info.vector_index) else: return result return super(ExprToISPCExprMapper, self).map_subscript( expr, type_context)
def map_subscript(self, expr, enclosing_prec, type_context): from loopy.kernel.data import TemporaryVariable ary = self.find_array(expr) if isinstance(ary, TemporaryVariable): gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: lsize, = lsize from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info(self.kernel.target, ary, expr.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) subscript, = access_info.subscripts result = self.parenthesize_if_needed( "%s[programIndex + %s]" % ( access_info.array_name, self.rec(lsize*subscript, PREC_SUM, 'i')), enclosing_prec, PREC_CALL) if access_info.vector_index is not None: return self.kernel.target.add_vector_access( result, access_info.vector_index) else: return result return super(ExprToISPCMapper, self).map_subscript( expr, enclosing_prec, type_context)
def emit_call(self, expression_to_code_mapper, expression, target): from pymbolic.primitives import Subscript if len(expression.parameters) != 1: raise LoopyError("%s takes exactly one argument" % self.name) arg, = expression.parameters if not isinstance(arg, Subscript): raise LoopyError("argument to %s must be a subscript" % self.name) ary = expression_to_code_mapper.find_array(arg) from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info( expression_to_code_mapper.kernel.target, ary, arg.index, lambda expr: evaluate( expr, expression_to_code_mapper.codegen_state.var_subst_map), expression_to_code_mapper.codegen_state.vectorization_info) from loopy.kernel.data import ImageArg if isinstance(ary, ImageArg): raise LoopyError("%s does not support images" % self.name) if self.name == "indexof": return access_info.subscripts[0] elif self.name == "indexof_vec": from loopy.kernel.array import VectorArrayDimTag ivec = None for iaxis, dim_tag in enumerate(ary.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): ivec = iaxis if ivec is None: return access_info.subscripts[0] else: return (access_info.subscripts[0] * ary.shape[ivec] + access_info.vector_index) else: raise RuntimeError("should not get here")
def map_call(self, expr, enclosing_prec, type_context): from pymbolic.primitives import Variable, Subscript from pymbolic.mapper.stringifier import PREC_NONE identifier = expr.function # {{{ implement indexof, indexof_vec if identifier.name in ["indexof", "indexof_vec"]: if len(expr.parameters) != 1: raise LoopyError("%s takes exactly one argument" % identifier.name) arg, = expr.parameters if not isinstance(arg, Subscript): raise LoopyError("argument to %s must be a subscript" % identifier.name) ary = self.find_array(arg) from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info( self.kernel.target, ary, arg.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) from loopy.kernel.data import ImageArg if isinstance(ary, ImageArg): raise LoopyError("%s does not support images" % identifier.name) if identifier.name == "indexof": return access_info.subscripts[0] elif identifier.name == "indexof_vec": from loopy.kernel.array import VectorArrayDimTag ivec = None for iaxis, dim_tag in enumerate(ary.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): ivec = iaxis if ivec is None: return access_info.subscripts[0] else: return (access_info.subscripts[0] * ary.shape[ivec] + access_info.vector_index) else: raise RuntimeError("should not get here") # }}} if isinstance(identifier, Variable): identifier = identifier.name par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) str_parameters = None mangle_result = self.kernel.mangle_function( identifier, par_dtypes, ast_builder=self.codegen_state.ast_builder) if mangle_result is None: raise RuntimeError( "function '%s' unknown--" "maybe you need to register a function mangler?" % identifier) if len(mangle_result.result_dtypes) != 1: raise LoopyError( "functions with more or fewer than one return value " "may not be used in an expression") if mangle_result.arg_dtypes is not None: str_parameters = [ self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip(expr.parameters, par_dtypes, mangle_result.arg_dtypes) ] else: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to # propagate the type context here. But for many others, it does # not. Using the inferred type as a stopgap for now. str_parameters = [ self.rec(par, PREC_NONE, type_context=dtype_to_type_context( self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes) ] from warnings import warn warn( "Calling function '%s' with unknown C signature--" "return CallMangleInfo.arg_dtypes" % identifier, LoopyWarning) from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add( SeenFunction(identifier, mangle_result.target_name, mangle_result.arg_dtypes or par_dtypes)) return "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters))
def map_subscript(self, expr, enclosing_prec, type_context): def base_impl(expr, enclosing_prec, type_context): return self.parenthesize_if_needed( "%s[%s]" % (self.rec(expr.aggregate, PREC_CALL, type_context), self.rec(expr.index, PREC_NONE, 'i')), enclosing_prec, PREC_CALL) from pymbolic.primitives import Variable if not isinstance(expr.aggregate, Variable): return base_impl(expr, enclosing_prec, type_context) ary = self.find_array(expr) from loopy.kernel.array import get_access_info from pymbolic import evaluate from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(self.kernel, idx) for idx in expr.index_tuple) access_info = get_access_info( self.kernel.target, ary, index_tuple, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) from loopy.kernel.data import ImageArg, GlobalArg, TemporaryVariable if isinstance(ary, ImageArg): base_access = ("read_imagef(%s, loopy_sampler, (float%d)(%s))" % (ary.name, ary.dimensions, ", ".join( self.rec(idx, PREC_NONE, 'i') for idx in expr.index[::-1]))) if ary.dtype.numpy_dtype == np.float32: return base_access + ".x" if self.kernel.target.is_vector_dtype(ary.dtype): return base_access elif ary.dtype.numpy_dtype == np.float64: return "as_double(%s.xy)" % base_access else: raise NotImplementedError( "non-floating-point images not supported for now") elif isinstance(ary, (GlobalArg, TemporaryVariable)): if len(access_info.subscripts) == 0: if isinstance(ary, GlobalArg): # unsubscripted global args are pointers result = "*" + access_info.array_name else: # unsubscripted temp vars are scalars result = access_info.array_name else: subscript, = access_info.subscripts result = self.parenthesize_if_needed( "%s[%s]" % (access_info.array_name, self.rec(subscript, PREC_NONE, 'i')), enclosing_prec, PREC_CALL) if access_info.vector_index is not None: return self.codegen_state.ast_builder.add_vector_access( result, access_info.vector_index) else: return result else: assert False
def emit_assignment(self, codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper assignee_var_name, = insn.assignee_var_names() lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype if insn.atomicity: raise NotImplementedError("atomic ops in ISPC") from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) rhs_code = ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) lhs = insn.assignee # {{{ handle streaming stores if "!streaming_store" in insn.tags: ary = ecm.find_array(lhs) from loopy.kernel.array import get_access_info from pymbolic import evaluate from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(kernel, idx) for idx in lhs.index_tuple) access_info = get_access_info( kernel.target, ary, index_tuple, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), codegen_state.vectorization_info) from loopy.kernel.data import GlobalArg, TemporaryVariable if not isinstance(ary, (GlobalArg, TemporaryVariable)): raise LoopyError("array type not supported in ISPC: %s" % type(ary).__name) if len(access_info.subscripts) != 1: raise LoopyError("streaming stores must have a subscript") subscript, = access_info.subscripts from pymbolic.primitives import Sum, flattened_sum, Variable if isinstance(subscript, Sum): terms = subscript.children else: terms = (subscript.children, ) new_terms = [] from loopy.kernel.data import LocalIndexTag from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) and isinstance( kernel.iname_to_tag.get(term.name), LocalIndexTag) and kernel.iname_to_tag.get(term.name).axis == 0): if saw_l0: raise LoopyError("streaming store must have stride 1 " "in local index, got: %s" % subscript) saw_l0 = True continue else: for dep in get_dependencies(term): if (isinstance(kernel.iname_to_tag.get(dep), LocalIndexTag) and kernel.iname_to_tag.get(dep).axis == 0): raise LoopyError( "streaming store must have stride 1 " "in local index, got: %s" % subscript) new_terms.append(term) if not saw_l0: raise LoopyError("streaming store must have stride 1 in " "local index, got: %s" % subscript) if access_info.vector_index is not None: raise LoopyError("streaming store may not use a short-vector " "data type") rhs_has_programindex = any( isinstance(kernel.iname_to_tag.get(dep), LocalIndexTag) and kernel.iname_to_tag.get(dep).axis == 0 for dep in get_dependencies(insn.expression)) if not rhs_has_programindex: rhs_code = "broadcast(%s, 0)" % rhs_code from cgen import Statement return Statement( "streaming_store(%s + %s, %s)" % (access_info.array_name, ecm(flattened_sum(new_terms), PREC_NONE, 'i'), rhs_code)) # }}} from cgen import Assign return Assign(ecm(lhs, prec=PREC_NONE, type_context=None), rhs_code)
def map_subscript(self, expr, type_context): def base_impl(expr, type_context): return self.rec(expr.aggregate, type_context)[self.rec(expr.index, 'i')] def make_var(name): from loopy import TaggedVariable if isinstance(expr.aggregate, TaggedVariable): return TaggedVariable(name, expr.aggregate.tag) else: return var(name) from pymbolic.primitives import Variable if not isinstance(expr.aggregate, Variable): return base_impl(expr, type_context) ary = self.find_array(expr) from loopy.kernel.array import get_access_info from pymbolic import evaluate from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(self.kernel, idx) for idx in expr.index_tuple) access_info = get_access_info(self.kernel.target, ary, index_tuple, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) from loopy.kernel.data import ( ImageArg, ArrayArg, TemporaryVariable, ConstantArg) if isinstance(ary, ImageArg): extra_axes = 0 num_target_axes = ary.num_target_axes() if num_target_axes in [1, 2]: idx_vec_type = "float2" extra_axes = 2-num_target_axes elif num_target_axes == 3: idx_vec_type = "float4" extra_axes = 4-num_target_axes else: raise LoopyError("unsupported number (%d) of target axes in image" % num_target_axes) idx_tuple = expr.index_tuple[::-1] + (0,) * extra_axes base_access = var("read_imagef")( var(ary.name), var("loopy_sampler"), var("(%s)" % idx_vec_type)(*self.rec(idx_tuple, 'i'))) if ary.dtype.numpy_dtype == np.float32: return base_access.attr("x") if self.kernel.target.is_vector_dtype(ary.dtype): return base_access elif ary.dtype.numpy_dtype == np.float64: return var("as_double")(base_access.attr("xy")) else: raise NotImplementedError( "non-floating-point images not supported for now") elif isinstance(ary, (ArrayArg, TemporaryVariable, ConstantArg)): if len(access_info.subscripts) == 0: if ( (isinstance(ary, (ConstantArg, ArrayArg)) or (isinstance(ary, TemporaryVariable) and ary.base_storage))): # unsubscripted global args are pointers result = make_var(access_info.array_name)[0] else: # unsubscripted temp vars are scalars # (unless they use base_storage) result = make_var(access_info.array_name) else: subscript, = access_info.subscripts result = make_var(access_info.array_name)[simplify_using_aff( self.kernel, self.rec(subscript, 'i'))] if access_info.vector_index is not None: return self.codegen_state.ast_builder.add_vector_access( result, access_info.vector_index) else: return result else: assert False
def map_call(self, expr, enclosing_prec, type_context): from pymbolic.primitives import Variable, Subscript from pymbolic.mapper.stringifier import PREC_NONE identifier = expr.function # {{{ implement indexof, indexof_vec if identifier.name in ["indexof", "indexof_vec"]: if len(expr.parameters) != 1: raise LoopyError("%s takes exactly one argument" % identifier.name) arg, = expr.parameters if not isinstance(arg, Subscript): raise LoopyError( "argument to %s must be a subscript" % identifier.name) ary = self.find_array(arg) from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info(self.kernel.target, ary, arg.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) from loopy.kernel.data import ImageArg if isinstance(ary, ImageArg): raise LoopyError("%s does not support images" % identifier.name) if identifier.name == "indexof": return access_info.subscripts[0] elif identifier.name == "indexof_vec": from loopy.kernel.array import VectorArrayDimTag ivec = None for iaxis, dim_tag in enumerate(ary.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): ivec = iaxis if ivec is None: return access_info.subscripts[0] else: return ( access_info.subscripts[0]*ary.shape[ivec] + access_info.vector_index) else: raise RuntimeError("should not get here") # }}} if isinstance(identifier, Variable): identifier = identifier.name par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) str_parameters = None mangle_result = self.kernel.mangle_function( identifier, par_dtypes, ast_builder=self.codegen_state.ast_builder) if mangle_result is None: raise RuntimeError("function '%s' unknown--" "maybe you need to register a function mangler?" % identifier) if len(mangle_result.result_dtypes) != 1: raise LoopyError("functions with more or fewer than one return value " "may not be used in an expression") if mangle_result.arg_dtypes is not None: str_parameters = [ self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip( expr.parameters, par_dtypes, mangle_result.arg_dtypes)] else: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to # propagate the type context here. But for many others, it does # not. Using the inferred type as a stopgap for now. str_parameters = [ self.rec(par, PREC_NONE, type_context=dtype_to_type_context( self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes)] from warnings import warn warn("Calling function '%s' with unknown C signature--" "return CallMangleInfo.arg_dtypes" % identifier, LoopyWarning) from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add( SeenFunction(identifier, mangle_result.target_name, mangle_result.arg_dtypes or par_dtypes)) return "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters))
def map_subscript(self, expr, enclosing_prec, type_context): def base_impl(expr, enclosing_prec, type_context): return self.parenthesize_if_needed( "%s[%s]" % ( self.rec(expr.aggregate, PREC_CALL, type_context), self.rec(expr.index, PREC_NONE, 'i')), enclosing_prec, PREC_CALL) from pymbolic.primitives import Variable if not isinstance(expr.aggregate, Variable): return base_impl(expr, enclosing_prec, type_context) ary = self.find_array(expr) from loopy.kernel.array import get_access_info from pymbolic import evaluate from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(self.kernel, idx) for idx in expr.index_tuple) access_info = get_access_info(self.kernel.target, ary, index_tuple, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) from loopy.kernel.data import ImageArg, GlobalArg, TemporaryVariable if isinstance(ary, ImageArg): base_access = ("read_imagef(%s, loopy_sampler, (float%d)(%s))" % (ary.name, ary.dimensions, ", ".join(self.rec(idx, PREC_NONE, 'i') for idx in expr.index[::-1]))) if ary.dtype.numpy_dtype == np.float32: return base_access+".x" if self.kernel.target.is_vector_dtype(ary.dtype): return base_access elif ary.dtype.numpy_dtype == np.float64: return "as_double(%s.xy)" % base_access else: raise NotImplementedError( "non-floating-point images not supported for now") elif isinstance(ary, (GlobalArg, TemporaryVariable)): if len(access_info.subscripts) == 0: if isinstance(ary, GlobalArg): # unsubscripted global args are pointers result = "*" + access_info.array_name else: # unsubscripted temp vars are scalars result = access_info.array_name else: subscript, = access_info.subscripts result = self.parenthesize_if_needed( "%s[%s]" % ( access_info.array_name, self.rec(subscript, PREC_NONE, 'i')), enclosing_prec, PREC_CALL) if access_info.vector_index is not None: return self.codegen_state.ast_builder.add_vector_access( result, access_info.vector_index) else: return result else: assert False
def map_call(self, expr, enclosing_prec, type_context): from pymbolic.primitives import Variable, Subscript from pymbolic.mapper.stringifier import PREC_NONE identifier = expr.function # {{{ implement indexof, indexof_vec if identifier.name in ["indexof", "indexof_vec"]: if len(expr.parameters) != 1: raise LoopyError("%s takes exactly one argument" % identifier.name) arg, = expr.parameters if not isinstance(arg, Subscript): raise LoopyError("argument to %s must be a subscript" % identifier.name) ary = self.find_array(arg) from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info( self.kernel.target, ary, arg.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info, ) from loopy.kernel.data import ImageArg if isinstance(ary, ImageArg): raise LoopyError("%s does not support images" % identifier.name) if identifier.name == "indexof": return access_info.subscripts[0] elif identifier.name == "indexof_vec": from loopy.kernel.array import VectorArrayDimTag ivec = None for iaxis, dim_tag in enumerate(ary.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): ivec = iaxis if ivec is None: return access_info.subscripts[0] else: return access_info.subscripts[0] * ary.shape[ivec] + access_info.vector_index else: raise RuntimeError("should not get here") # }}} c_name = None if isinstance(identifier, Variable): identifier = identifier.name c_name = identifier par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) str_parameters = None mangle_result = self.kernel.mangle_function(identifier, par_dtypes) if mangle_result is not None: if len(mangle_result) == 2: result_dtype, c_name = mangle_result elif len(mangle_result) == 3: result_dtype, c_name, arg_tgt_dtypes = mangle_result str_parameters = [ self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip(expr.parameters, par_dtypes, arg_tgt_dtypes) ] else: raise RuntimeError("result of function mangler " "for function '%s' not understood" % identifier) from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add(SeenFunction(identifier, c_name, par_dtypes)) if str_parameters is None: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to # propagate the type context here. But for many others, it does # not. Using the inferred type as a stopgap for now. str_parameters = [ self.rec(par, PREC_NONE, type_context=dtype_to_type_context(self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes) ] if c_name is None: raise RuntimeError("unable to find C name for function identifier '%s'" % identifier) return "%s(%s)" % (c_name, ", ".join(str_parameters))
def emit_assignment(self, codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper assignee_var_name, = insn.assignee_var_names() lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype if insn.atomicity: raise NotImplementedError("atomic ops in ISPC") from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) rhs_code = ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) lhs = insn.assignee # {{{ handle streaming stores if "!streaming_store" in insn.tags: ary = ecm.find_array(lhs) from loopy.kernel.array import get_access_info from pymbolic import evaluate from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(kernel, idx) for idx in lhs.index_tuple) access_info = get_access_info(kernel.target, ary, index_tuple, lambda expr: evaluate(expr, codegen_state.var_subst_map), codegen_state.vectorization_info) from loopy.kernel.data import ArrayArg, TemporaryVariable if not isinstance(ary, (ArrayArg, TemporaryVariable)): raise LoopyError("array type not supported in ISPC: %s" % type(ary).__name) if len(access_info.subscripts) != 1: raise LoopyError("streaming stores must have a subscript") subscript, = access_info.subscripts from pymbolic.primitives import Sum, flattened_sum, Variable if isinstance(subscript, Sum): terms = subscript.children else: terms = (subscript.children,) new_terms = [] from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) and kernel.iname_tags_of_type(term.name, LocalIndexTag)): tag, = kernel.iname_tags_of_type( term.name, LocalIndexTag, min_num=1, max_num=1) if tag.axis == 0: if saw_l0: raise LoopyError( "streaming store must have stride 1 in " "local index, got: %s" % subscript) saw_l0 = True continue else: for dep in get_dependencies(term): if filter_iname_tags_by_type( kernel.iname_to_tags.get(dep, []), LocalIndexTag): tag, = filter_iname_tags_by_type( kernel.iname_to_tags.get(dep, []), LocalIndexTag, 1) if tag.axis == 0: raise LoopyError( "streaming store must have stride 1 in " "local index, got: %s" % subscript) new_terms.append(term) if not saw_l0: raise LoopyError("streaming store must have stride 1 in " "local index, got: %s" % subscript) if access_info.vector_index is not None: raise LoopyError("streaming store may not use a short-vector " "data type") rhs_has_programindex = any( isinstance(tag, LocalIndexTag) and tag.axis == 0 for tag in kernel.iname_tags(dep) for dep in get_dependencies(insn.expression)) if not rhs_has_programindex: rhs_code = "broadcast(%s, 0)" % rhs_code from cgen import Statement return Statement( "streaming_store(%s + %s, %s)" % ( access_info.array_name, ecm(flattened_sum(new_terms), PREC_NONE, 'i'), rhs_code)) # }}} from cgen import Assign return Assign(ecm(lhs, prec=PREC_NONE, type_context=None), rhs_code)
def map_subscript(self, expr, enclosing_prec, type_context): def base_impl(expr, enclosing_prec, type_context): return self.parenthesize_if_needed( "%s[%s]" % ( self.rec(expr.aggregate, PREC_CALL, type_context), self.rec(expr.index, PREC_NONE, 'i')), enclosing_prec, PREC_CALL) from pymbolic.primitives import Variable if not isinstance(expr.aggregate, Variable): return base_impl(expr, enclosing_prec, type_context) if expr.aggregate.name in self.kernel.arg_dict: ary = self.kernel.arg_dict[expr.aggregate.name] elif expr.aggregate.name in self.kernel.temporary_variables: ary = self.kernel.temporary_variables[expr.aggregate.name] else: raise RuntimeError("nothing known about subscripted variable '%s'" % expr.aggregate.name) from loopy.kernel.array import ArrayBase if not isinstance(ary, ArrayBase): raise RuntimeError("subscripted variable '%s' is not an array" % expr.aggregate.name) from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info(self.kernel.target, ary, expr.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) vec_member = get_opencl_vec_member(access_info.vector_index) from loopy.kernel.data import ImageArg, GlobalArg, TemporaryVariable if isinstance(ary, ImageArg): base_access = ("read_imagef(%s, loopy_sampler, (float%d)(%s))" % (ary.name, ary.dimensions, ", ".join(self.rec(idx, PREC_NONE, 'i') for idx in expr.index[::-1]))) if ary.dtype == np.float32: return base_access+".x" if self.kernel.target.is_vector_dtype(ary.dtype): return base_access elif ary.dtype == np.float64: return "as_double(%s.xy)" % base_access else: raise NotImplementedError( "non-floating-point images not supported for now") elif isinstance(ary, (GlobalArg, TemporaryVariable)): if len(access_info.subscripts) == 0: if isinstance(ary, GlobalArg): # unsubscripted global args are pointers if vec_member is not None: return "%s->%s" % ( access_info.array_name, vec_member) else: return "*" + access_info.array_name else: # unsubscripted temp vars are scalars if vec_member is not None: return "%s.%s" % ( access_info.array_name, vec_member) else: return access_info.array_name else: subscript, = access_info.subscripts result = self.parenthesize_if_needed( "%s[%s]" % ( access_info.array_name, self.rec(subscript, PREC_NONE, 'i')), enclosing_prec, PREC_CALL) if vec_member: result += "."+vec_member return result else: assert False