def convert_equality_to_cgen(self, equality): """Convert given equality to :class:`cgen.Generable` statement :param equality: Given equality statement :returns: The resulting :class:`cgen.Generable` statement """ if isinstance(equality, cgen.Generable): return equality elif isinstance(equality, Iteration): equality.substitute(self._mapper) return equality.ccode else: s_lhs = ccode( self.time_substitutions(equality.lhs).xreplace(self._mapper)) s_rhs = self.time_substitutions(equality.rhs).xreplace( self._mapper) # appending substituted stencil,which is used to determine alignment pragma self.sub_stencils.append(s_rhs) s_rhs = ccode(s_rhs) if self.dtype is np.float32: s_rhs = str(s_rhs).replace("pow", "powf") s_rhs = str(s_rhs).replace("fabs", "fabsf") return cgen.Assign(s_lhs, s_rhs)
def get_time_stepping(self): """Add the time stepping code to the loop :returns: A list of :class:`cgen.Statement` containing the time stepping code """ ti = self._mapper[self.time_dim] body = [] time_stepper_indices = range(self.time_order + 1) first_time_index = 0 step_backwards = -1 if self._forward is not True: time_stepper_indices = reversed(time_stepper_indices) first_time_index = self.time_order step_backwards = 1 for i in time_stepper_indices: lhs = self.time_steppers[i].name if i == first_time_index: rhs = ccode(ti % (self.time_order + 1)) else: rhs = ccode((self.time_steppers[i + step_backwards] + 1) % (self.time_order + 1)) body.append(cgen.Assign(lhs, rhs)) return body
def sympy_to_cgen(self, stencils): """Converts sympy stencils to cgen statements :param stencils: A list of stencils to be converted :returns: :class:`cgen.Block` containing the converted kernel """ factors = [] if len(self.factorized) > 0: for name, term in zip(self.factorized.keys(), self.factorized): expr = self.factorized[name] self.add_local_var(name, self.dtype) sub = str( ccode( self.time_substitutions(expr).xreplace(self._mapper))) if self.dtype is np.float32: factors.append( cgen.Assign(name, (sub.replace("pow", "powf").replace( "fabs", "fabsf")))) else: factors.append(cgen.Assign(name, sub)) decl = [] declared = defaultdict(bool) for eqn in stencils: s_lhs = str(eqn.lhs) if s_lhs.find("temp") is not -1 and not declared[s_lhs]: expr_dtype = dse_dtype(eqn.rhs) or self.dtype declared[s_lhs] = True decl.append( cgen.Value(cgen.dtype_to_ctype(expr_dtype), ccode(eqn.lhs))) stmts = [self.convert_equality_to_cgen(x) for x in stencils] for idx, dec in enumerate(decl): stmts[idx] = cgen.Assign(dec.inline(), stmts[idx].rvalue) kernel = stmts return cgen.Block(factors + kernel)
def test_2d(self): data = np.arange(6, dtype=np.float64).reshape((3, 2)) kernel = cgen.Assign("output_grid[i2][i1]", "input_grid[i2][i1] + 3") propagator = Propagator("process", 3, (2, ), []) propagator.add_param("input_grid", data.shape, data.dtype) propagator.add_param("output_grid", data.shape, data.dtype) propagator.loop_body = kernel f = propagator.cfunction arr = np.empty_like(data) f(data, arr) assert (arr[2][1] == 8)
def test_4d(self): kernel = cgen.Assign("output_grid[i4][i1][i2][i3]", "input_grid[i4][i1][i2][i3] + 3") data = np.arange(120, dtype=np.float64).reshape((5, 4, 3, 2)) propagator = Propagator("process", 5, (4, 3, 2), []) propagator.add_param("input_grid", data.shape, data.dtype) propagator.add_param("output_grid", data.shape, data.dtype) propagator.loop_body = kernel f = propagator.cfunction arr = np.empty_like(data) f(data, arr) assert (arr[4][3][2][1] == 122)