def generate_function_signature(self, function_descriptor): """Generates a function signature from a :class:`FunctionDescriptor` :param function_descriptor: The :class:`FunctionDescriptor` to process :returns: :class:`cgen.FunctionDeclaration` -- The function declaration generated from the function_descriptor """ function_params = [] for param in function_descriptor.matrix_params: param_vec_def = cgen.Pointer( cgen.POD(param['dtype'], param['name'] + "_vec")) function_params.append(param_vec_def) if self.mic_flag: function_params += [ cgen.Pointer( cgen.POD(param['dtype'], param['name'] + "_pointer")) for param in function_descriptor.value_params ] else: function_params += [ cgen.POD(param['dtype'], param['name']) for param in function_descriptor.value_params ] for param in function_descriptor.struct_params: function_params.append( cgen.Pointer( cgen.Value("struct %s" % (param['stype']), param['name']))) if self.mic_flag: return cgen.FunctionDeclaration( cgen.Value(self._pymic_attribute + '\nint', function_descriptor.name), function_params) else: return cgen.Extern( "C", cgen.FunctionDeclaration( cgen.Value('int', function_descriptor.name), function_params))
def sympy_to_cgen(self, stencils): """Converts sympy stencils to cgen statements :param stencils: A list of stencils to be converted :returns: :class:`cgen.Block` containing the converted kernel """ factors = [] if len(self.factorized) > 0: for name, term in zip(self.factorized.keys(), self.factorized): expr = self.factorized[name] self.add_local_var(name, self.dtype) sub = str( ccode( self.time_substitutions(expr).xreplace(self._mapper))) if self.dtype is np.float32: factors.append( cgen.Assign(name, (sub.replace("pow", "powf").replace( "fabs", "fabsf")))) else: factors.append(cgen.Assign(name, sub)) decl = [] declared = defaultdict(bool) for eqn in stencils: s_lhs = str(eqn.lhs) if s_lhs.find("temp") is not -1 and not declared[s_lhs]: expr_dtype = dse_dtype(eqn.rhs) or self.dtype declared[s_lhs] = True decl.append( cgen.Value(cgen.dtype_to_ctype(expr_dtype), ccode(eqn.lhs))) stmts = [self.convert_equality_to_cgen(x) for x in stencils] for idx, dec in enumerate(decl): stmts[idx] = cgen.Assign(dec.inline(), stmts[idx].rvalue) kernel = stmts return cgen.Block(factors + kernel)
def generate_space_loops(self, loop_body): """Generate list<cgen.For> for a non cache blocking space loop :param loop_body: Statement representing the loop body :returns: :list<cgen.For> a list of for loops """ inner_most_dim = True for spc_var in reversed(list(self.space_dims)): dim_var = self._mapper[spc_var] loop_limits = self._space_loop_limits[spc_var] loop_body = cgen.For( cgen.InlineInitializer(cgen.Value("int", dim_var), str(loop_limits[0])), str(dim_var) + "<" + str(loop_limits[1]), str(dim_var) + "++", loop_body) loop_body = self.add_inner_most_dim_pragma(inner_most_dim, self.space_dims, loop_body) inner_most_dim = False return [loop_body] # returns body as a list
def generate_space_loops_blocking(self, loop_body): """Generate list<cgen.For> for a cache blocking space loop :param loop_body: Statement representing the loop body :returns: :list<cgen.For> a list of for loops """ inner_most_dim = True orig_loop_body = loop_body omp_for = [cgen.Pragma("omp for schedule(static)") ] if self.compiler.openmp else [] for spc_var, block_size in reversed( zip(list(self.space_dims), self.block_sizes)): orig_var = str(self._mapper[spc_var]) block_var = orig_var + "b" loop_limits = self._space_loop_limits[spc_var] if block_size is not None: upper_limit_str = "%s+%sblock" % (block_var, orig_var) lower_limit_str = block_var else: lower_limit_str = str(loop_limits[0]) upper_limit_str = str(loop_limits[1]) loop_body = cgen.For( cgen.InlineInitializer(cgen.Value("int", orig_var), lower_limit_str), orig_var + "<" + upper_limit_str, orig_var + "++", loop_body) loop_body = self.add_inner_most_dim_pragma(inner_most_dim, self.space_dims, loop_body) inner_most_dim = False remainder_counter = 0 # indicates how many remainder loops we need for spc_var, block_size in reversed( zip(list(self.space_dims), self.block_sizes)): # if block size set to None do not block this dimension if block_size is not None: orig_var = str(self._mapper[spc_var]) block_var = orig_var + "b" loop_limits = self._space_loop_limits[spc_var] block_size_str = orig_var + "block" upper_limit_str = "%d - (%d %% %s)" % ( loop_limits[1], loop_limits[1] - loop_limits[0], block_size_str) loop_body = cgen.For( cgen.InlineInitializer(cgen.Value("int", block_var), str(loop_limits[0])), str(block_var) + "<" + upper_limit_str, str(block_var) + "+=" + block_size_str, loop_body) remainder_counter += 1 full_remainder = [] # weights for deciding remainder loop limit weights = self._decide_weights(self.block_sizes, remainder_counter) for i in range(remainder_counter): remainder_loop = orig_loop_body inner_most_dim = True for spc_var, block_size in reversed( zip(list(self.space_dims), self.block_sizes)): orig_var = str(self._mapper[spc_var]) loop_limits = self._space_loop_limits[ spc_var] # Full loop limits lower_limit_str = str(loop_limits[0]) upper_limit_str = str(loop_limits[1]) if block_size is not None: if weights[orig_var] < 0: # already blocked loop limits upper_limit_str = "%d - (%d %% %s)" % ( loop_limits[1], loop_limits[1] - loop_limits[0], orig_var + "block") elif weights[orig_var] == 0: # remainder loop limits lower_limit_str = "%d - (%d %% %s)" % ( loop_limits[1], loop_limits[1] - loop_limits[0], orig_var + "block") weights[orig_var] += 1 remainder_loop = cgen.For( cgen.InlineInitializer(cgen.Value("int", orig_var), lower_limit_str), str(orig_var) + "<" + upper_limit_str, str(orig_var) + "++", remainder_loop) remainder_loop = self.add_inner_most_dim_pragma( inner_most_dim, self.space_dims, remainder_loop) inner_most_dim = False full_remainder += omp_for full_remainder.append(remainder_loop) return [loop_body] + full_remainder if full_remainder else [loop_body]
def generate_loops(self, loop_body): """Assuming that the variable order defined in init (#var_order) is the order the corresponding dimensions are layout in memory, the last variable in that definition should be the fastest varying dimension in the arrays. Therefore reverse the list of dimensions, making the last variable in #var_order (z in the 3D case) vary in the inner-most loop :param loop_body: Statement representing the loop body :returns: :class:`cgen.Block` representing the loop """ # Space loops if not isinstance(loop_body, cgen.Block) or len(loop_body.contents) > 0: if self.cache_blocking is not None: self._decide_block_sizes() loop_body = self.generate_space_loops_blocking(loop_body) else: loop_body = self.generate_space_loops(loop_body) else: loop_body = [] omp_master = [cgen.Pragma("omp master") ] if self.compiler.openmp else [] omp_single = [cgen.Pragma("omp single") ] if self.compiler.openmp else [] omp_parallel = [cgen.Pragma("omp parallel") ] if self.compiler.openmp else [] omp_for = [cgen.Pragma("omp for schedule(static)") ] if self.compiler.openmp else [] t_loop_limits = self.time_loop_limits t_var = str(self._mapper[self.time_dim]) cond_op = "<" if self._forward else ">" if self.save is not True: # To cycle between array elements when we are not saving time history time_stepping = self.get_time_stepping() else: time_stepping = [] if len(loop_body) > 0: loop_body = [cgen.Block(omp_for + loop_body)] # Statements to be inserted into the time loop before the spatial loop pre_stencils = [ self.time_substitutions(x) for x in self.time_loop_stencils_b ] pre_stencils = [ self.convert_equality_to_cgen(x) for x in self.time_loop_stencils_b ] # Statements to be inserted into the time loop after the spatial loop post_stencils = [ self.time_substitutions(x) for x in self.time_loop_stencils_a ] post_stencils = [ self.convert_equality_to_cgen(x) for x in self.time_loop_stencils_a ] if self.profile: pre_stencils = list( flatten([ self.profiler.add_profiling([s], "%s%d" % (PRE_STENCILS.name, i)) for i, s in enumerate(pre_stencils) ])) post_stencils = list( flatten([ self.profiler.add_profiling([s], "%s%d" % (POST_STENCILS.name, i)) for i, s in enumerate(post_stencils) ])) initial_block = time_stepping + pre_stencils if initial_block: initial_block = omp_single + [cgen.Block(initial_block)] end_block = post_stencils if end_block: end_block = omp_single + [cgen.Block(end_block)] if self.profile: loop_body = self.profiler.add_profiling(loop_body, LOOP_BODY.name, omp_flag=omp_master) loop_body = cgen.Block(initial_block + loop_body + end_block) loop_body = cgen.For( cgen.InlineInitializer(cgen.Value("int", t_var), str(t_loop_limits[0])), t_var + cond_op + str(t_loop_limits[1]), t_var + "+=" + str(self._time_step), loop_body) # Code to declare the time stepping variables (outside the time loop) def_time_step = [ cgen.Value("int", t_var_def.name) for t_var_def in self.time_steppers ] body = def_time_step + omp_parallel + [loop_body] return cgen.Block(body)