def map_call(self, expr, enclosing_prec, type_context): from pymbolic.primitives import Variable from pymbolic.mapper.stringifier import PREC_NONE identifier = expr.function c_name = None if isinstance(identifier, Variable): identifier = identifier.name c_name = identifier par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) str_parameters = None mangle_result = self.kernel.mangle_function(identifier, par_dtypes) if mangle_result is not None: if len(mangle_result) == 2: result_dtype, c_name = mangle_result elif len(mangle_result) == 3: result_dtype, c_name, arg_tgt_dtypes = mangle_result str_parameters = [ self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip( expr.parameters, par_dtypes, arg_tgt_dtypes)] else: raise RuntimeError("result of function mangler " "for function '%s' not understood" % identifier) from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add( SeenFunction(identifier, c_name, par_dtypes)) if str_parameters is None: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to # propagate the type context here. But for many others, it does # not. Using the inferred type as a stopgap for now. str_parameters = [ self.rec(par, PREC_NONE, type_context=dtype_to_type_context( self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes)] if c_name is None: raise RuntimeError("unable to find C name for function identifier '%s'" % identifier) return "%s(%s)" % (c_name, ", ".join(str_parameters))
def emit_call_insn(self, insn, target, expression_to_code_mapper): assert self.is_ready_for_codegen() assert isinstance(insn, loopy.CallInstruction) parameters = insn.expression.parameters parameters = list(parameters) par_dtypes = [self.arg_id_to_dtype[i] for i, _ in enumerate(parameters)] parameters.append(insn.assignees[-1]) par_dtypes.append(self.arg_id_to_dtype[0]) mat_descr = self.arg_id_to_descr[0] arg_c_parameters = [ expression_to_code_mapper( par, PREC_NONE, dtype_to_type_context(target, par_dtype), par_dtype ).expr for par, par_dtype in zip(parameters, par_dtypes) ] c_parameters = [arg_c_parameters[-1]] c_parameters.extend([arg for arg in arg_c_parameters[:-1]]) c_parameters.append(numpy.int32(mat_descr.shape[1])) # n return var(self.name_in_target)(*c_parameters), False
def map_comparison(self, expr, type_context): inner_type_context = dtype_to_type_context( self.kernel.target, self.infer_type(expr.left - expr.right)) return type(expr)(self.rec(expr.left, inner_type_context), expr.operator, self.rec(expr.right, inner_type_context))
def emit_call_insn(self, insn, target, expression_to_code_mapper): # reorder arguments, e.g. a,c = f(b,d) to f(a,b,c,d) parameters = [] reads = iter(insn.expression.parameters) writes = iter(insn.assignees) for ac in self.access: if ac is READ: parameters.append(next(reads)) else: parameters.append(next(writes)) # pass layer argument if needed for layer in reads: parameters.append(layer) par_dtypes = tuple(expression_to_code_mapper.infer_type(p) for p in parameters) from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE from pymbolic import var c_parameters = [ expression_to_code_mapper( par, PREC_NONE, dtype_to_type_context(target, par_dtype), par_dtype).expr for par, par_dtype in zip(parameters, par_dtypes)] assignee_is_returned = False return var(self.name_in_target)(*c_parameters), assignee_is_returned
def emit_tuple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper from cgen import Assign, block_if_necessary assignments = [] for i, (assignee, parameter) in enumerate( zip(insn.assignees, insn.expression.parameters)): lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None) assignee_var_name = insn.assignee_var_names()[i] lhs_var = codegen_state.kernel.get_var_descriptor( assignee_var_name) lhs_dtype = lhs_var.dtype from loopy.expression import dtype_to_type_context rhs_type_context = dtype_to_type_context( codegen_state.kernel.target, lhs_dtype) rhs_code = ecm(parameter, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) assignments.append(Assign(lhs_code, rhs_code)) return block_if_necessary(assignments)
def emit_call_insn(self, insn, target, expression_to_code_mapper): # reorder arguments, e.g. a,c = f(b,d) to f(a,b,c,d) parameters = [] reads = iter(insn.expression.parameters) writes = iter(insn.assignees) for ac in self.access: if ac is READ: parameters.append(next(reads)) else: parameters.append(next(writes)) # pass layer argument if needed for layer in reads: parameters.append(layer) par_dtypes = tuple( expression_to_code_mapper.infer_type(p) for p in parameters) from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE from pymbolic import var c_parameters = [ expression_to_code_mapper(par, PREC_NONE, dtype_to_type_context(target, par_dtype), par_dtype).expr for par, par_dtype in zip(parameters, par_dtypes) ] assignee_is_returned = False return var(self.name_in_target)(*c_parameters), assignee_is_returned
def generate_array_literal(codegen_state, array, value): data = generate_linearized_array(array, value) ecm = codegen_state.expression_to_code_mapper from pymbolic.mapper.stringifier import PREC_NONE from loopy.expression import dtype_to_type_context type_context = dtype_to_type_context(codegen_state.kernel.target, array.dtype) return "{ %s }" % ", ".join( ecm(d_i, PREC_NONE, type_context, array.dtype) for d_i in data)
def map_comparison(self, expr, enclosing_prec, type_context): from pymbolic.mapper.stringifier import PREC_COMPARISON inner_type_context = dtype_to_type_context( self.kernel.target, self.infer_type(expr.left - expr.right)) return self.parenthesize_if_needed( "%s %s %s" % (self.rec(expr.left, PREC_COMPARISON, inner_type_context), expr.operator, self.rec(expr.right, PREC_COMPARISON, inner_type_context)), enclosing_prec, PREC_COMPARISON)
def map_comparison(self, expr, enclosing_prec, type_context): from pymbolic.mapper.stringifier import PREC_COMPARISON inner_type_context = dtype_to_type_context( self.kernel.target, self.infer_type(expr.left - expr.right)) return self.parenthesize_if_needed( "%s %s %s" % ( self.rec(expr.left, PREC_COMPARISON, inner_type_context), expr.operator, self.rec(expr.right, PREC_COMPARISON, inner_type_context)), enclosing_prec, PREC_COMPARISON)
def generate_array_literal(codegen_state, array, value): data = generate_linearized_array(array, value) ecm = codegen_state.expression_to_code_mapper from loopy.expression import dtype_to_type_context from loopy.symbolic import ArrayLiteral type_context = dtype_to_type_context(codegen_state.kernel.target, array.dtype) return CExpression( codegen_state.ast_builder.get_c_expression_to_code_mapper(), ArrayLiteral(tuple( ecm.map_constant(d_i, type_context) for d_i in data)))
def emit_assignment(self, codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper assignee_var_name, = insn.assignee_var_names() lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype if insn.atomicity is not None: lhs_atomicity = [ a for a in insn.atomicity if a.var_name == assignee_var_name] assert len(lhs_atomicity) <= 1 if lhs_atomicity: lhs_atomicity, = lhs_atomicity else: lhs_atomicity = None else: lhs_atomicity = None from loopy.kernel.data import AtomicInit, AtomicUpdate from loopy.expression import dtype_to_type_context lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None) rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) if lhs_atomicity is None: from cgen import Assign return Assign( lhs_code, ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype)) elif isinstance(lhs_atomicity, AtomicInit): codegen_state.seen_atomic_dtypes.add(lhs_dtype) return codegen_state.ast_builder.emit_atomic_init( codegen_state, lhs_atomicity, lhs_var, insn.assignee, insn.expression, lhs_dtype, rhs_type_context) elif isinstance(lhs_atomicity, AtomicUpdate): codegen_state.seen_atomic_dtypes.add(lhs_dtype) return codegen_state.ast_builder.emit_atomic_update( codegen_state, lhs_atomicity, lhs_var, insn.assignee, insn.expression, lhs_dtype, rhs_type_context) else: raise ValueError("unexpected lhs atomicity type: %s" % type(lhs_atomicity).__name__)
def generate_array_literal(codegen_state, array, value): data = generate_linearized_array(array, value) ecm = codegen_state.expression_to_code_mapper from loopy.expression import dtype_to_type_context from loopy.symbolic import ArrayLiteral type_context = dtype_to_type_context(codegen_state.kernel.target, array.dtype) return CExpression( codegen_state.ast_builder.get_c_expression_to_code_mapper(), ArrayLiteral( tuple( ecm.map_constant(d_i, type_context) for d_i in data)))
def emit_call_insn(self, insn, target, expression_to_code_mapper): from loopy.target.c import CFamilyTarget if not isinstance(target, CFamilyTarget): raise NotImplementedError() from loopy.kernel.instruction import CallInstruction assert self.is_ready_for_codegen() assert isinstance(insn, CallInstruction) ecm = expression_to_code_mapper parameters = insn.expression.parameters assignees = insn.assignees parameters = list(parameters) par_dtypes = [ self.arg_id_to_dtype[i] for i, _ in enumerate(parameters) ] kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) # insert the assignees at the required positions assignee_write_count = -1 for i, arg in enumerate(self.subkernel.args): if arg.is_output: if not arg.is_input: assignee = assignees[-assignee_write_count - 1] parameters.insert(i, assignee) par_dtypes.insert( i, self.arg_id_to_dtype[assignee_write_count]) assignee_write_count -= 1 # no type casting in array calls from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE from pymbolic import var tgt_parameters = [ ecm(par, PREC_NONE, dtype_to_type_context(target, par_dtype), par_dtype).expr for par, par_dtype in zip(parameters, par_dtypes) ] return var(self.subkernel.name)(*tgt_parameters), False
def emit_tuple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper from cgen import Assign, block_if_necessary assignments = [] for i, (assignee, parameter) in enumerate( zip(insn.assignees, insn.expression.parameters)): lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None) assignee_var_name = insn.assignee_var_names()[i] lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype from loopy.expression import dtype_to_type_context rhs_type_context = dtype_to_type_context( codegen_state.kernel.target, lhs_dtype) rhs_code = ecm(parameter, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) assignments.append(Assign(lhs_code, rhs_code)) return block_if_necessary(assignments)
def emit_call(self, expression_to_code_mapper, expression, target): assert self.is_ready_for_codegen() # must have single assignee assert len(expression.parameters) == len(self.arg_id_to_dtype) - 1 arg_dtypes = tuple(self.arg_id_to_dtype[id] for id in range(len(self.arg_id_to_dtype) - 1)) par_dtypes = tuple( expression_to_code_mapper.infer_type(par) for par in expression.parameters) from loopy.expression import dtype_to_type_context # processing the parameters with the required dtypes processed_parameters = tuple( expression_to_code_mapper.rec( par, dtype_to_type_context(target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip(expression.parameters, par_dtypes, arg_dtypes)) from pymbolic import var return var(self.name_in_target)(*processed_parameters)
def generate_assignment_instruction_code(codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper from loopy.expression import dtype_to_type_context, VectorizabilityChecker # {{{ vectorization handling if codegen_state.vectorization_info: if insn.atomicity: raise Unvectorizable("atomic operation") vinfo = codegen_state.vectorization_info vcheck = VectorizabilityChecker(kernel, vinfo.iname, vinfo.length) lhs_is_vector = vcheck(insn.assignee) rhs_is_vector = vcheck(insn.expression) if not lhs_is_vector and rhs_is_vector: raise Unvectorizable("LHS is scalar, RHS is vector, cannot assign") is_vector = lhs_is_vector del lhs_is_vector del rhs_is_vector # }}} from pymbolic.primitives import Variable, Subscript from loopy.symbolic import LinearSubscript lhs = insn.assignee if isinstance(lhs, Variable): assignee_var_name = lhs.name assignee_indices = () elif isinstance(lhs, Subscript): assignee_var_name = lhs.aggregate.name assignee_indices = lhs.index_tuple elif isinstance(lhs, LinearSubscript): assignee_var_name = lhs.aggregate.name assignee_indices = (lhs.index, ) else: raise RuntimeError("invalid lvalue '%s'" % lhs) lhs_var = kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype if insn.atomicity is not None: lhs_atomicity = [ a for a in insn.atomicity if a.var_name == assignee_var_name ] assert len(lhs_atomicity) <= 1 if lhs_atomicity: lhs_atomicity, = lhs_atomicity else: lhs_atomicity = None else: lhs_atomicity = None from loopy.kernel.data import AtomicInit, AtomicUpdate lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None) rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) if lhs_atomicity is None: result = codegen_state.ast_builder.emit_assignment( codegen_state, lhs_code, ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype)) elif isinstance(lhs_atomicity, AtomicInit): raise NotImplementedError("atomic init") elif isinstance(lhs_atomicity, AtomicUpdate): codegen_state.seen_atomic_dtypes.add(lhs_dtype) result = codegen_state.ast_builder.generate_atomic_update( kernel, codegen_state, lhs_atomicity, lhs_var, insn.assignee, insn.expression, lhs_dtype, rhs_type_context) else: raise ValueError("unexpected lhs atomicity type: %s" % type(lhs_atomicity).__name__) # {{{ tracing if kernel.options.trace_assignments or kernel.options.trace_assignment_values: if codegen_state.vectorization_info and is_vector: raise Unvectorizable("tracing does not support vectorization") from cgen import Statement as S # noqa gs, ls = kernel.get_grid_size_upper_bounds() printf_format = "%s.%s[%s][%s]: %s" % (kernel.name, insn.id, ", ".join( "gid%d=%%d" % i for i in range(len(gs))), ", ".join( "lid%d=%%d" % i for i in range(len(ls))), assignee_var_name) printf_args = (["gid(%d)" % i for i in range(len(gs))] + ["lid(%d)" % i for i in range(len(ls))]) if assignee_indices: printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"]) printf_args.extend( ecm(i, prec=PREC_NONE, type_context="i") for i in assignee_indices) if kernel.options.trace_assignment_values: if lhs_dtype.numpy_dtype.kind == "i": printf_format += " = %d" printf_args.append(lhs_code) elif lhs_dtype.numpy_dtype.kind == "f": printf_format += " = %g" printf_args.append(lhs_code) elif lhs_dtype.numpy_dtype.kind == "c": printf_format += " = %g + %gj" printf_args.extend(["(%s).x" % lhs_code, "(%s).y" % lhs_code]) if printf_args: printf_args_str = ", " + ", ".join(printf_args) else: printf_args_str = "" printf_insn = S("printf(\"%s\\n\"%s)" % (printf_format, printf_args_str)) from cgen import Block if kernel.options.trace_assignment_values: result = Block([result, printf_insn]) else: # print first, execute later -> helps find segfaults result = Block([printf_insn, result]) # }}} return result
def emit_multiple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper from pymbolic.primitives import Variable from pymbolic.mapper.stringifier import PREC_NONE func_id = insn.expression.function parameters = insn.expression.parameters if isinstance(func_id, Variable): func_id = func_id.name assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) for a in insn.assignee_var_names() ] par_dtypes = tuple(ecm.infer_type(par) for par in parameters) mangle_result = codegen_state.kernel.mangle_function( func_id, par_dtypes) if mangle_result is None: raise RuntimeError( "function '%s' unknown--" "maybe you need to register a function mangler?" % func_id) assert mangle_result.arg_dtypes is not None from loopy.expression import dtype_to_type_context c_parameters = [ ecm(par, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( parameters, par_dtypes, mangle_result.arg_dtypes) ] from loopy.codegen import SeenFunction codegen_state.seen_functions.add( SeenFunction(func_id, mangle_result.target_name, mangle_result.arg_dtypes)) from pymbolic import var for i, (a, tgt_dtype) in enumerate( zip(insn.assignees[1:], mangle_result.result_dtypes[1:])): if tgt_dtype != ecm.infer_type(a): raise LoopyError("type mismatch in %d'th (1-based) left-hand " "side of instruction '%s'" % (i + 1, insn.id)) c_parameters.append( # TODO Yuck: The "where-at function": &(...) var("&")(ecm(a, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr)) from pymbolic import var result = var(mangle_result.target_name)(*c_parameters) # In case of no assignees, we are done if len(mangle_result.result_dtypes) == 0: from cgen import ExpressionStatement return ExpressionStatement( CExpression(self.get_c_expression_to_code_mapper(), result)) result = ecm.wrap_in_typecast(mangle_result.result_dtypes[0], assignee_var_descriptors[0].dtype, result) lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) from cgen import Assign return Assign( lhs_code, CExpression(self.get_c_expression_to_code_mapper(), result))
def emit_assignment(self, codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper assignee_var_name, = insn.assignee_var_names() lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype if insn.atomicity: raise NotImplementedError("atomic ops in ISPC") from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) rhs_code = ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) lhs = insn.assignee # {{{ handle streaming stores if "!streaming_store" in insn.tags: ary = ecm.find_array(lhs) from loopy.kernel.array import get_access_info from pymbolic import evaluate from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(kernel, idx) for idx in lhs.index_tuple) access_info = get_access_info(kernel.target, ary, index_tuple, lambda expr: evaluate(expr, codegen_state.var_subst_map), codegen_state.vectorization_info) from loopy.kernel.data import ArrayArg, TemporaryVariable if not isinstance(ary, (ArrayArg, TemporaryVariable)): raise LoopyError("array type not supported in ISPC: %s" % type(ary).__name) if len(access_info.subscripts) != 1: raise LoopyError("streaming stores must have a subscript") subscript, = access_info.subscripts from pymbolic.primitives import Sum, flattened_sum, Variable if isinstance(subscript, Sum): terms = subscript.children else: terms = (subscript.children,) new_terms = [] from loopy.kernel.data import LocalIndexTag, filter_iname_tags_by_type from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) and kernel.iname_tags_of_type(term.name, LocalIndexTag)): tag, = kernel.iname_tags_of_type( term.name, LocalIndexTag, min_num=1, max_num=1) if tag.axis == 0: if saw_l0: raise LoopyError( "streaming store must have stride 1 in " "local index, got: %s" % subscript) saw_l0 = True continue else: for dep in get_dependencies(term): if filter_iname_tags_by_type( kernel.iname_to_tags.get(dep, []), LocalIndexTag): tag, = filter_iname_tags_by_type( kernel.iname_to_tags.get(dep, []), LocalIndexTag, 1) if tag.axis == 0: raise LoopyError( "streaming store must have stride 1 in " "local index, got: %s" % subscript) new_terms.append(term) if not saw_l0: raise LoopyError("streaming store must have stride 1 in " "local index, got: %s" % subscript) if access_info.vector_index is not None: raise LoopyError("streaming store may not use a short-vector " "data type") rhs_has_programindex = any( isinstance(tag, LocalIndexTag) and tag.axis == 0 for tag in kernel.iname_tags(dep) for dep in get_dependencies(insn.expression)) if not rhs_has_programindex: rhs_code = "broadcast(%s, 0)" % rhs_code from cgen import Statement return Statement( "streaming_store(%s + %s, %s)" % ( access_info.array_name, ecm(flattened_sum(new_terms), PREC_NONE, 'i'), rhs_code)) # }}} from cgen import Assign return Assign(ecm(lhs, prec=PREC_NONE, type_context=None), rhs_code)
def map_call(self, expr, enclosing_prec, type_context): from pymbolic.primitives import Variable, Subscript from pymbolic.mapper.stringifier import PREC_NONE identifier = expr.function # {{{ implement indexof, indexof_vec if identifier.name in ["indexof", "indexof_vec"]: if len(expr.parameters) != 1: raise LoopyError("%s takes exactly one argument" % identifier.name) arg, = expr.parameters if not isinstance(arg, Subscript): raise LoopyError("argument to %s must be a subscript" % identifier.name) ary = self.find_array(arg) from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info( self.kernel.target, ary, arg.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info, ) from loopy.kernel.data import ImageArg if isinstance(ary, ImageArg): raise LoopyError("%s does not support images" % identifier.name) if identifier.name == "indexof": return access_info.subscripts[0] elif identifier.name == "indexof_vec": from loopy.kernel.array import VectorArrayDimTag ivec = None for iaxis, dim_tag in enumerate(ary.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): ivec = iaxis if ivec is None: return access_info.subscripts[0] else: return access_info.subscripts[0] * ary.shape[ivec] + access_info.vector_index else: raise RuntimeError("should not get here") # }}} c_name = None if isinstance(identifier, Variable): identifier = identifier.name c_name = identifier par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) str_parameters = None mangle_result = self.kernel.mangle_function(identifier, par_dtypes) if mangle_result is not None: if len(mangle_result) == 2: result_dtype, c_name = mangle_result elif len(mangle_result) == 3: result_dtype, c_name, arg_tgt_dtypes = mangle_result str_parameters = [ self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip(expr.parameters, par_dtypes, arg_tgt_dtypes) ] else: raise RuntimeError("result of function mangler " "for function '%s' not understood" % identifier) from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add(SeenFunction(identifier, c_name, par_dtypes)) if str_parameters is None: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to # propagate the type context here. But for many others, it does # not. Using the inferred type as a stopgap for now. str_parameters = [ self.rec(par, PREC_NONE, type_context=dtype_to_type_context(self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes) ] if c_name is None: raise RuntimeError("unable to find C name for function identifier '%s'" % identifier) return "%s(%s)" % (c_name, ", ".join(str_parameters))
def emit_multiple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper from pymbolic.primitives import Variable from pymbolic.mapper.stringifier import PREC_NONE func_id = insn.expression.function parameters = insn.expression.parameters if isinstance(func_id, Variable): func_id = func_id.name assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) for a in insn.assignee_var_names()] par_dtypes = tuple(ecm.infer_type(par) for par in parameters) str_parameters = None mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) if mangle_result is None: raise RuntimeError("function '%s' unknown--" "maybe you need to register a function mangler?" % func_id) assert mangle_result.arg_dtypes is not None from loopy.expression import dtype_to_type_context str_parameters = [ ecm(par, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip( parameters, par_dtypes, mangle_result.arg_dtypes)] from loopy.codegen import SeenFunction codegen_state.seen_functions.add( SeenFunction(func_id, mangle_result.target_name, mangle_result.arg_dtypes)) for i, (a, tgt_dtype) in enumerate( zip(insn.assignees[1:], mangle_result.result_dtypes[1:])): if tgt_dtype != ecm.infer_type(a): raise LoopyError("type mismatch in %d'th (1-based) left-hand " "side of instruction '%s'" % (i+1, insn.id)) str_parameters.append( "&(%s)" % ecm(a, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype)) result = "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters)) result = ecm.wrap_in_typecast( mangle_result.result_dtypes[0], assignee_var_descriptors[0].dtype, result) lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) from cgen import Assign return Assign( lhs_code, result)
def map_call(self, expr, enclosing_prec, type_context): from pymbolic.primitives import Variable, Subscript from pymbolic.mapper.stringifier import PREC_NONE identifier = expr.function # {{{ implement indexof, indexof_vec if identifier.name in ["indexof", "indexof_vec"]: if len(expr.parameters) != 1: raise LoopyError("%s takes exactly one argument" % identifier.name) arg, = expr.parameters if not isinstance(arg, Subscript): raise LoopyError( "argument to %s must be a subscript" % identifier.name) ary = self.find_array(arg) from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info(self.kernel.target, ary, arg.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) from loopy.kernel.data import ImageArg if isinstance(ary, ImageArg): raise LoopyError("%s does not support images" % identifier.name) if identifier.name == "indexof": return access_info.subscripts[0] elif identifier.name == "indexof_vec": from loopy.kernel.array import VectorArrayDimTag ivec = None for iaxis, dim_tag in enumerate(ary.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): ivec = iaxis if ivec is None: return access_info.subscripts[0] else: return ( access_info.subscripts[0]*ary.shape[ivec] + access_info.vector_index) else: raise RuntimeError("should not get here") # }}} if isinstance(identifier, Variable): identifier = identifier.name par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) str_parameters = None mangle_result = self.kernel.mangle_function( identifier, par_dtypes, ast_builder=self.codegen_state.ast_builder) if mangle_result is None: raise RuntimeError("function '%s' unknown--" "maybe you need to register a function mangler?" % identifier) if len(mangle_result.result_dtypes) != 1: raise LoopyError("functions with more or fewer than one return value " "may not be used in an expression") if mangle_result.arg_dtypes is not None: str_parameters = [ self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip( expr.parameters, par_dtypes, mangle_result.arg_dtypes)] else: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to # propagate the type context here. But for many others, it does # not. Using the inferred type as a stopgap for now. str_parameters = [ self.rec(par, PREC_NONE, type_context=dtype_to_type_context( self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes)] from warnings import warn warn("Calling function '%s' with unknown C signature--" "return CallMangleInfo.arg_dtypes" % identifier, LoopyWarning) from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add( SeenFunction(identifier, mangle_result.target_name, mangle_result.arg_dtypes or par_dtypes)) return "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters))
def generate_assignment_instruction_code(codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper from loopy.expression import dtype_to_type_context, VectorizabilityChecker # {{{ vectorization handling if codegen_state.vectorization_info: if insn.atomicity: raise Unvectorizable("atomic operation") vinfo = codegen_state.vectorization_info vcheck = VectorizabilityChecker( kernel, vinfo.iname, vinfo.length) lhs_is_vector = vcheck(insn.assignee) rhs_is_vector = vcheck(insn.expression) if not lhs_is_vector and rhs_is_vector: raise Unvectorizable( "LHS is scalar, RHS is vector, cannot assign") is_vector = lhs_is_vector del lhs_is_vector del rhs_is_vector # }}} from pymbolic.primitives import Variable, Subscript from loopy.symbolic import LinearSubscript lhs = insn.assignee if isinstance(lhs, Variable): assignee_var_name = lhs.name assignee_indices = () elif isinstance(lhs, Subscript): assignee_var_name = lhs.aggregate.name assignee_indices = lhs.index_tuple elif isinstance(lhs, LinearSubscript): assignee_var_name = lhs.aggregate.name assignee_indices = (lhs.index,) else: raise RuntimeError("invalid lvalue '%s'" % lhs) lhs_var = kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype if insn.atomicity is not None: lhs_atomicity = [ a for a in insn.atomicity if a.var_name == assignee_var_name] assert len(lhs_atomicity) <= 1 if lhs_atomicity: lhs_atomicity, = lhs_atomicity else: lhs_atomicity = None else: lhs_atomicity = None from loopy.kernel.data import AtomicInit, AtomicUpdate lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None) rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) if lhs_atomicity is None: result = codegen_state.ast_builder.emit_assignment( codegen_state, lhs_code, ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype)) elif isinstance(lhs_atomicity, AtomicInit): raise NotImplementedError("atomic init") elif isinstance(lhs_atomicity, AtomicUpdate): codegen_state.seen_atomic_dtypes.add(lhs_dtype) result = codegen_state.ast_builder.generate_atomic_update( kernel, codegen_state, lhs_atomicity, lhs_var, insn.assignee, insn.expression, lhs_dtype, rhs_type_context) else: raise ValueError("unexpected lhs atomicity type: %s" % type(lhs_atomicity).__name__) # {{{ tracing if kernel.options.trace_assignments or kernel.options.trace_assignment_values: if codegen_state.vectorization_info and is_vector: raise Unvectorizable("tracing does not support vectorization") from cgen import Statement as S # noqa gs, ls = kernel.get_grid_size_upper_bounds() printf_format = "%s.%s[%s][%s]: %s" % ( kernel.name, insn.id, ", ".join("gid%d=%%d" % i for i in range(len(gs))), ", ".join("lid%d=%%d" % i for i in range(len(ls))), assignee_var_name) printf_args = ( ["gid(%d)" % i for i in range(len(gs))] + ["lid(%d)" % i for i in range(len(ls))] ) if assignee_indices: printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"]) printf_args.extend( ecm(i, prec=PREC_NONE, type_context="i") for i in assignee_indices) if kernel.options.trace_assignment_values: if lhs_dtype.numpy_dtype.kind == "i": printf_format += " = %d" printf_args.append(lhs_code) elif lhs_dtype.numpy_dtype.kind == "f": printf_format += " = %g" printf_args.append(lhs_code) elif lhs_dtype.numpy_dtype.kind == "c": printf_format += " = %g + %gj" printf_args.extend([ "(%s).x" % lhs_code, "(%s).y" % lhs_code]) if printf_args: printf_args_str = ", " + ", ".join(printf_args) else: printf_args_str = "" printf_insn = S("printf(\"%s\\n\"%s)" % ( printf_format, printf_args_str)) from cgen import Block if kernel.options.trace_assignment_values: result = Block([result, printf_insn]) else: # print first, execute later -> helps find segfaults result = Block([printf_insn, result]) # }}} return result
def generate_expr_instruction_code(kernel, insn, codegen_state): ecm = codegen_state.expression_to_code_mapper from loopy.expression import dtype_to_type_context, VectorizabilityChecker if codegen_state.vectorization_info: vinfo = codegen_state.vectorization_info vcheck = VectorizabilityChecker(kernel, vinfo.iname, vinfo.length) rhs_is_vector = vcheck(insn.assignee) lhs_is_vector = vcheck(insn.expression) if lhs_is_vector != rhs_is_vector: raise Unvectorizable("LHS and RHS disagree on whether they are vectors") is_vector = lhs_is_vector del lhs_is_vector del rhs_is_vector expr = insn.expression (assignee_var_name, assignee_indices), = insn.assignees_and_indices() target_dtype = kernel.get_var_descriptor(assignee_var_name).dtype from cgen import Assign lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None) result = Assign( lhs_code, ecm( expr, prec=PREC_NONE, type_context=dtype_to_type_context(kernel.target, target_dtype), needed_dtype=target_dtype, ), ) if kernel.options.trace_assignments or kernel.options.trace_assignment_values: if codegen_state.vectorization_info and is_vector: raise Unvectorizable("tracing does not support vectorization") from cgen import Statement as S # noqa gs, ls = kernel.get_grid_sizes() printf_format = "%s.%s[%s][%s]: %s" % ( kernel.name, insn.id, ", ".join("gid%d=%%d" % i for i in range(len(gs))), ", ".join("lid%d=%%d" % i for i in range(len(ls))), assignee_var_name, ) printf_args = ["gid(%d)" % i for i in range(len(gs))] + ["lid(%d)" % i for i in range(len(ls))] if assignee_indices: printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"]) printf_args.extend(ecm(i, prec=PREC_NONE, type_context="i") for i in assignee_indices) if kernel.options.trace_assignment_values: if target_dtype.kind == "i": printf_format += " = %d" printf_args.append(lhs_code) elif target_dtype.kind == "f": printf_format += " = %g" printf_args.append(lhs_code) elif target_dtype.kind == "c": printf_format += " = %g + %gj" printf_args.extend(["(%s).x" % lhs_code, "(%s).y" % lhs_code]) if printf_args: printf_args_str = ", " + ", ".join(printf_args) else: printf_args_str = "" printf_insn = S('printf("%s\\n"%s)' % (printf_format, printf_args_str)) from cgen import Block if kernel.options.trace_assignment_values: result = Block([result, printf_insn]) else: # print first, execute later -> helps find segfaults result = Block([printf_insn, result]) return result
def map_call(self, expr, enclosing_prec, type_context): from pymbolic.primitives import Variable, Subscript from pymbolic.mapper.stringifier import PREC_NONE identifier = expr.function # {{{ implement indexof, indexof_vec if identifier.name in ["indexof", "indexof_vec"]: if len(expr.parameters) != 1: raise LoopyError("%s takes exactly one argument" % identifier.name) arg, = expr.parameters if not isinstance(arg, Subscript): raise LoopyError("argument to %s must be a subscript" % identifier.name) ary = self.find_array(arg) from loopy.kernel.array import get_access_info from pymbolic import evaluate access_info = get_access_info( self.kernel.target, ary, arg.index, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), self.codegen_state.vectorization_info) from loopy.kernel.data import ImageArg if isinstance(ary, ImageArg): raise LoopyError("%s does not support images" % identifier.name) if identifier.name == "indexof": return access_info.subscripts[0] elif identifier.name == "indexof_vec": from loopy.kernel.array import VectorArrayDimTag ivec = None for iaxis, dim_tag in enumerate(ary.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): ivec = iaxis if ivec is None: return access_info.subscripts[0] else: return (access_info.subscripts[0] * ary.shape[ivec] + access_info.vector_index) else: raise RuntimeError("should not get here") # }}} if isinstance(identifier, Variable): identifier = identifier.name par_dtypes = tuple(self.infer_type(par) for par in expr.parameters) str_parameters = None mangle_result = self.kernel.mangle_function( identifier, par_dtypes, ast_builder=self.codegen_state.ast_builder) if mangle_result is None: raise RuntimeError( "function '%s' unknown--" "maybe you need to register a function mangler?" % identifier) if len(mangle_result.result_dtypes) != 1: raise LoopyError( "functions with more or fewer than one return value " "may not be used in an expression") if mangle_result.arg_dtypes is not None: str_parameters = [ self.rec(par, PREC_NONE, dtype_to_type_context(self.kernel.target, tgt_dtype), tgt_dtype) for par, par_dtype, tgt_dtype in zip(expr.parameters, par_dtypes, mangle_result.arg_dtypes) ] else: # /!\ FIXME For some functions (e.g. 'sin'), it makes sense to # propagate the type context here. But for many others, it does # not. Using the inferred type as a stopgap for now. str_parameters = [ self.rec(par, PREC_NONE, type_context=dtype_to_type_context( self.kernel.target, par_dtype)) for par, par_dtype in zip(expr.parameters, par_dtypes) ] from warnings import warn warn( "Calling function '%s' with unknown C signature--" "return CallMangleInfo.arg_dtypes" % identifier, LoopyWarning) from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add( SeenFunction(identifier, mangle_result.target_name, mangle_result.arg_dtypes or par_dtypes)) return "%s(%s)" % (mangle_result.target_name, ", ".join(str_parameters))
def emit_call_insn(self, insn, target, expression_to_code_mapper): """ :arg insn: An instance of :class:`loopy.kernel.instructions.CallInstruction`. :arg target: An instance of :class:`loopy.target.TargetBase`. :arg expression_to_code_mapper: An instance of :class:`IdentityMapper` responsible for code mapping from :mod:`loopy` syntax to the **target syntax**. :returns: A tuple of the call to be generated and an instance of :class:`bool` whether the first assignee is a part of the LHS in the assignment instruction. .. note:: The default implementation returns the first assignees and the references of the rest of the assignees are appended to the arguments of the call. *Example:* ``c, d = f(a, b)`` is returned as ``c = f(a, b, &d)`` """ from loopy.target.c import CFamilyTarget if not isinstance(target, CFamilyTarget): raise NotImplementedError() from loopy.kernel.instruction import CallInstruction from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE from pymbolic import var assert isinstance(insn, CallInstruction) assert self.is_ready_for_codegen() ecm = expression_to_code_mapper parameters = insn.expression.parameters assignees = insn.assignees[1:] par_dtypes = tuple( expression_to_code_mapper.infer_type(par) for par in parameters) arg_dtypes = tuple(self.arg_id_to_dtype[i] for i, _ in enumerate(parameters)) assignee_dtypes = tuple(self.arg_id_to_dtype[-i - 2] for i, _ in enumerate(assignees)) tgt_parameters = [ ecm(par, PREC_NONE, dtype_to_type_context(target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( parameters, par_dtypes, arg_dtypes) ] for a, tgt_dtype in zip(assignees, assignee_dtypes): if tgt_dtype != expression_to_code_mapper.infer_type(a): raise LoopyError("Type Mismatch in function %s. Expected: %s" "Got: %s" % (self.name, tgt_dtype, expression_to_code_mapper.infer_type(a))) tgt_parameters.append( var("&")(ecm(a, PREC_NONE, dtype_to_type_context(target, tgt_dtype), tgt_dtype).expr)) # assignee is returned whenever the size of assignees is non zero. first_assignee_is_returned = len(insn.assignees) > 0 return var( self.name_in_target)(*tgt_parameters), first_assignee_is_returned
def emit_multiple_assignment(self, codegen_state, insn): ecm = codegen_state.expression_to_code_mapper from pymbolic.primitives import Variable from pymbolic.mapper.stringifier import PREC_NONE func_id = insn.expression.function parameters = insn.expression.parameters if isinstance(func_id, Variable): func_id = func_id.name assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) for a in insn.assignee_var_names()] par_dtypes = tuple(ecm.infer_type(par) for par in parameters) mangle_result = codegen_state.kernel.mangle_function(func_id, par_dtypes) if mangle_result is None: raise RuntimeError("function '%s' unknown--" "maybe you need to register a function mangler?" % func_id) assert mangle_result.arg_dtypes is not None if mangle_result.target_name == "loopy_make_tuple": # This shorcut avoids actually having to emit a 'make_tuple' function. return self.emit_tuple_assignment(codegen_state, insn) from loopy.expression import dtype_to_type_context c_parameters = [ ecm(par, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr for par, par_dtype, tgt_dtype in zip( parameters, par_dtypes, mangle_result.arg_dtypes)] from loopy.codegen import SeenFunction codegen_state.seen_functions.add( SeenFunction(func_id, mangle_result.target_name, mangle_result.arg_dtypes)) from pymbolic import var for i, (a, tgt_dtype) in enumerate( zip(insn.assignees[1:], mangle_result.result_dtypes[1:])): if tgt_dtype != ecm.infer_type(a): raise LoopyError("type mismatch in %d'th (1-based) left-hand " "side of instruction '%s'" % (i+1, insn.id)) c_parameters.append( # TODO Yuck: The "where-at function": &(...) var("&")( ecm(a, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype), tgt_dtype).expr)) from pymbolic import var result = var(mangle_result.target_name)(*c_parameters) # In case of no assignees, we are done if len(mangle_result.result_dtypes) == 0: from cgen import ExpressionStatement return ExpressionStatement( CExpression(self.get_c_expression_to_code_mapper(), result)) result = ecm.wrap_in_typecast( mangle_result.result_dtypes[0], assignee_var_descriptors[0].dtype, result) lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) from cgen import Assign return Assign( lhs_code, CExpression(self.get_c_expression_to_code_mapper(), result))
def emit_assignment(self, codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper assignee_var_name, = insn.assignee_var_names() lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype if insn.atomicity: raise NotImplementedError("atomic ops in ISPC") from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) rhs_code = ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) lhs = insn.assignee # {{{ handle streaming stores if "!streaming_store" in insn.tags: ary = ecm.find_array(lhs) from loopy.kernel.array import get_access_info from pymbolic import evaluate from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(kernel, idx) for idx in lhs.index_tuple) access_info = get_access_info( kernel.target, ary, index_tuple, lambda expr: evaluate(expr, self.codegen_state.var_subst_map), codegen_state.vectorization_info) from loopy.kernel.data import GlobalArg, TemporaryVariable if not isinstance(ary, (GlobalArg, TemporaryVariable)): raise LoopyError("array type not supported in ISPC: %s" % type(ary).__name) if len(access_info.subscripts) != 1: raise LoopyError("streaming stores must have a subscript") subscript, = access_info.subscripts from pymbolic.primitives import Sum, flattened_sum, Variable if isinstance(subscript, Sum): terms = subscript.children else: terms = (subscript.children, ) new_terms = [] from loopy.kernel.data import LocalIndexTag from loopy.symbolic import get_dependencies saw_l0 = False for term in terms: if (isinstance(term, Variable) and isinstance( kernel.iname_to_tag.get(term.name), LocalIndexTag) and kernel.iname_to_tag.get(term.name).axis == 0): if saw_l0: raise LoopyError("streaming store must have stride 1 " "in local index, got: %s" % subscript) saw_l0 = True continue else: for dep in get_dependencies(term): if (isinstance(kernel.iname_to_tag.get(dep), LocalIndexTag) and kernel.iname_to_tag.get(dep).axis == 0): raise LoopyError( "streaming store must have stride 1 " "in local index, got: %s" % subscript) new_terms.append(term) if not saw_l0: raise LoopyError("streaming store must have stride 1 in " "local index, got: %s" % subscript) if access_info.vector_index is not None: raise LoopyError("streaming store may not use a short-vector " "data type") rhs_has_programindex = any( isinstance(kernel.iname_to_tag.get(dep), LocalIndexTag) and kernel.iname_to_tag.get(dep).axis == 0 for dep in get_dependencies(insn.expression)) if not rhs_has_programindex: rhs_code = "broadcast(%s, 0)" % rhs_code from cgen import Statement return Statement( "streaming_store(%s + %s, %s)" % (access_info.array_name, ecm(flattened_sum(new_terms), PREC_NONE, 'i'), rhs_code)) # }}} from cgen import Assign return Assign(ecm(lhs, prec=PREC_NONE, type_context=None), rhs_code)