def ast_matmul(self, F_a): """Generate an AST for a PyOP2 kernel performing a matrix-vector multiplication. :param F_a: Assembled firedrake.Function object for the RHS""" # The number of dofs on each element is /ndofs*cdim/ F_a_fs = F_a.function_space() ndofs = sum(F_a_fs.topological.dofs_per_entity) cdim = F_a_fs.dim name = 'mat_vec_mul_kernel_%s' % F_a_fs.name identifier = (ndofs, cdim, name) if identifier in self.asts: return self.asts[identifier] # Craft the AST body = ast.Incr(ast.Symbol('C', ('i/%d' % cdim, 'i%%%d' % cdim)), ast.Prod(ast.Symbol('A', ('i',), ((ndofs*cdim, 'j*%d + k' % cdim),)), ast.Symbol('B', ('j', 'k')))) body = ast.c_for('k', cdim, body).children[0] body = [ast.Assign(ast.Symbol('C', ('i/%d' % cdim, 'i%%%d' % cdim)), '0.0'), ast.c_for('j', ndofs, body).children[0]] body = ast.Root([ast.c_for('i', ndofs*cdim, body).children[0]]) funargs = [ast.Decl('double*', 'A'), ast.Decl('double**', 'B'), ast.Decl('double**', 'C')] fundecl = ast.FunDecl('void', name, funargs, body, ['static', 'inline']) # Track the AST for later fast retrieval self.asts[identifier] = fundecl return fundecl
def compile_c_kernel(expression, to_pts, to_element, fs, coords): """Produce a :class:`PyOP2.Kernel` from the c expression provided.""" coords_space = coords.function_space() coords_element = coords_space.fiat_element names = {v[0] for v in expression._user_args} X = coords_element.tabulate(0, to_pts).values()[0] # Produce C array notation of X. X_str = "{{"+"},\n{".join([",".join(map(str, x)) for x in X.T])+"}}" A = utils.unique_name("A", names) X = utils.unique_name("X", names) x_ = utils.unique_name("x_", names) k = utils.unique_name("k", names) d = utils.unique_name("d", names) i_ = utils.unique_name("i", names) # x is a reserved name. x = "x" if "x" in names: raise ValueError("cannot use 'x' as a user-defined Expression variable") ass_exp = [ast.Assign(ast.Symbol(A, (k,), ((len(expression.code), i),)), ast.FlatBlock("%s" % code)) for i, code in enumerate(expression.code)] vals = { "X": X, "x": x, "x_": x_, "k": k, "d": d, "i": i_, "x_array": X_str, "dim": coords_space.dim, "xndof": coords_element.space_dimension(), # FS will always either be a functionspace or # vectorfunctionspace, so just accessing dim here is safe # (we don't need to go through ufl_element.value_shape()) "nfdof": to_element.space_dimension() * numpy.prod(fs.dim, dtype=int), "ndof": to_element.space_dimension(), "assign_dim": numpy.prod(expression.value_shape(), dtype=int) } init = ast.FlatBlock(""" const double %(X)s[%(ndof)d][%(xndof)d] = %(x_array)s; double %(x)s[%(dim)d]; const double pi = 3.141592653589793; """ % vals) block = ast.FlatBlock(""" for (unsigned int %(d)s=0; %(d)s < %(dim)d; %(d)s++) { %(x)s[%(d)s] = 0; for (unsigned int %(i)s=0; %(i)s < %(xndof)d; %(i)s++) { %(x)s[%(d)s] += %(X)s[%(k)s][%(i)s] * %(x_)s[%(i)s][%(d)s]; }; }; """ % vals) loop = ast.c_for(k, "%(ndof)d" % vals, ast.Block([block] + ass_exp, open_scope=True)) user_args = [] user_init = [] for _, arg in expression._user_args: if arg.shape == (1, ): user_args.append(ast.Decl("double *", "%s_" % arg.name)) user_init.append(ast.FlatBlock("const double %s = *%s_;" % (arg.name, arg.name))) else: user_args.append(ast.Decl("double *", arg.name)) kernel_code = ast.FunDecl("void", "expression_kernel", [ast.Decl("double", ast.Symbol(A, (int("%(nfdof)d" % vals),))), ast.Decl("double**", x_)] + user_args, ast.Block(user_init + [init, loop], open_scope=False)) coefficients = [coords] for _, arg in expression._user_args: coefficients.append(GlobalWrapper(arg)) return op2.Kernel(kernel_code, kernel_code.name), False, tuple(coefficients)
def compile_c_kernel(expression, to_pts, to_element, fs, coords): """Produce a :class:`PyOP2.Kernel` from the c expression provided.""" coords_space = coords.function_space() coords_element = create_element(coords_space.ufl_element(), vector_is_mixed=False) names = {v[0] for v in expression._user_args} X = list(coords_element.tabulate(0, to_pts).values())[0] # Produce C array notation of X. X_str = "{{" + "},\n{".join([",".join(map(str, x)) for x in X.T]) + "}}" A = utils.unique_name("A", names) X = utils.unique_name("X", names) x_ = utils.unique_name("x_", names) k = utils.unique_name("k", names) d = utils.unique_name("d", names) i_ = utils.unique_name("i", names) # x is a reserved name. x = "x" if "x" in names: raise ValueError( "cannot use 'x' as a user-defined Expression variable") ass_exp = [ ast.Assign(ast.Symbol(A, (k, ), ((len(expression.code), i), )), ast.FlatBlock("%s" % code)) for i, code in enumerate(expression.code) ] dim = coords_space.value_size ndof = to_element.space_dimension() xndof = coords_element.space_dimension() nfdof = to_element.space_dimension() * numpy.prod(fs.value_size, dtype=int) init_X = ast.Decl(typ="double", sym=ast.Symbol(X, rank=(ndof, xndof)), qualifiers=["const"], init=X_str) init_x = ast.Decl(typ="double", sym=ast.Symbol(x, rank=(coords_space.value_size, ))) init_pi = ast.Decl(typ="double", sym="pi", qualifiers=["const"], init="3.141592653589793") init = ast.Block([init_X, init_x, init_pi]) incr_x = ast.Incr( ast.Symbol(x, rank=(d, )), ast.Prod(ast.Symbol(X, rank=(k, i_)), ast.Symbol(x_, rank=(ast.Sum(ast.Prod(i_, dim), d), )))) assign_x = ast.Assign(ast.Symbol(x, rank=(d, )), 0) loop_x = ast.For(init=ast.Decl("unsigned int", i_, 0), cond=ast.Less(i_, xndof), incr=ast.Incr(i_, 1), body=[incr_x]) block = ast.For(init=ast.Decl("unsigned int", d, 0), cond=ast.Less(d, dim), incr=ast.Incr(d, 1), body=[assign_x, loop_x]) loop = ast.c_for(k, ndof, ast.Block([block] + ass_exp, open_scope=True)) user_args = [] user_init = [] for _, arg in expression._user_args: if arg.shape == (1, ): user_args.append(ast.Decl("double *", "%s_" % arg.name)) user_init.append( ast.FlatBlock("const double %s = *%s_;" % (arg.name, arg.name))) else: user_args.append(ast.Decl("double *", arg.name)) kernel_code = ast.FunDecl( "void", "expression_kernel", [ ast.Decl("double", ast.Symbol(A, (nfdof, ))), ast.Decl("double*", x_) ] + user_args, ast.Block(user_init + [init, loop], open_scope=False)) coefficients = [coords] for _, arg in expression._user_args: coefficients.append(GlobalWrapper(arg)) return op2.Kernel(kernel_code, kernel_code.name), False, tuple(coefficients)
def compile_c_kernel(expression, to_pts, to_element, fs, coords): """Produce a :class:`PyOP2.Kernel` from the c expression provided.""" coords_space = coords.function_space() coords_element = coords_space.fiat_element names = {v[0] for v in expression._user_args} X = coords_element.tabulate(0, to_pts).values()[0] # Produce C array notation of X. X_str = "{{"+"},\n{".join([",".join(map(str, x)) for x in X.T])+"}}" A = utils.unique_name("A", names) X = utils.unique_name("X", names) x_ = utils.unique_name("x_", names) k = utils.unique_name("k", names) d = utils.unique_name("d", names) i_ = utils.unique_name("i", names) # x is a reserved name. x = "x" if "x" in names: raise ValueError("cannot use 'x' as a user-defined Expression variable") ass_exp = [ast.Assign(ast.Symbol(A, (k,), ((len(expression.code), i),)), ast.FlatBlock("%s" % code)) for i, code in enumerate(expression.code)] dim = coords_space.dim ndof = to_element.space_dimension() xndof = coords_element.space_dimension() nfdof = to_element.space_dimension() * numpy.prod(fs.dim, dtype=int) init_X = ast.Decl(typ="double", sym=ast.Symbol(X, rank=(ndof, xndof)), qualifiers=["const"], init=X_str) init_x = ast.Decl(typ="double", sym=ast.Symbol(x, rank=(coords_space.dim,))) init_pi = ast.Decl(typ="double", sym="pi", qualifiers=["const"], init="3.141592653589793") init = ast.Block([init_X, init_x, init_pi]) incr_x = ast.Incr(ast.Symbol(x, rank=(d,)), ast.Prod(ast.Symbol(X, rank=(k, i_)), ast.Symbol(x_, rank=(i_, d)))) assign_x = ast.Assign(ast.Symbol(x, rank=(d,)), 0) loop_x = ast.For(init=ast.Decl("unsigned int", i_, 0), cond=ast.Less(i_, xndof), incr=ast.Incr(i_, 1), body=[incr_x]) block = ast.For(init=ast.Decl("unsigned int", d, 0), cond=ast.Less(d, dim), incr=ast.Incr(d, 1), body=[assign_x, loop_x]) loop = ast.c_for(k, ndof, ast.Block([block] + ass_exp, open_scope=True)) user_args = [] user_init = [] for _, arg in expression._user_args: if arg.shape == (1, ): user_args.append(ast.Decl("double *", "%s_" % arg.name)) user_init.append(ast.FlatBlock("const double %s = *%s_;" % (arg.name, arg.name))) else: user_args.append(ast.Decl("double *", arg.name)) kernel_code = ast.FunDecl("void", "expression_kernel", [ast.Decl("double", ast.Symbol(A, (nfdof,))), ast.Decl("double**", x_)] + user_args, ast.Block(user_init + [init, loop], open_scope=False)) coefficients = [coords] for _, arg in expression._user_args: coefficients.append(GlobalWrapper(arg)) return op2.Kernel(kernel_code, kernel_code.name), False, tuple(coefficients)
def ast_matmul(self, F_a, implementation='optimized'): """Generate an AST for a PyOP2 kernel performing a matrix-vector multiplication.""" # The number of dofs on each element is /ndofs*cdim/ F_a_fs = F_a.function_space() ndofs = F_a_fs.fiat_element.entity_dofs() ndofs = sum(self.mesh.make_dofs_per_plex_entity(ndofs)) cdim = F_a_fs.dim name = 'mat_vec_mul_kernel_%s' % F_a_fs.name identifier = (ndofs, cdim, name, implementation) if identifier in self.asts: return self.asts[identifier] from coffee import isa, options if cdim and cdim % isa['dp_reg'] == 0: simd_pragma = '#pragma simd reduction(+:sum)' else: simd_pragma = '' # Craft the AST if implementation == 'optimized' and cdim >= 4: body = ast.Incr( ast.Symbol('sum'), ast.Prod( ast.Symbol('A', ('i', ), ((ndofs * cdim, 'j*%d + k' % cdim), )), ast.Symbol('B', ('j', 'k')))) body = ast.c_for('k', cdim, body, simd_pragma).children[0] body = [ ast.Decl('const int', ast.Symbol('index'), init=ast.Symbol('i%%%d' % cdim)), ast.Decl('double', ast.Symbol('sum'), init=ast.Symbol('0.0')), ast.c_for('j', ndofs, body).children[0], ast.Assign(ast.Symbol('C', ('i/%d' % cdim, 'index')), 'sum') ] body = ast.Block([ast.c_for('i', ndofs * cdim, body).children[0]]) funargs = [ ast.Decl('double* restrict', 'A'), ast.Decl('double *restrict *restrict', 'B'), ast.Decl('double *restrict *', 'C') ] fundecl = ast.FunDecl('void', name, funargs, body, ['static', 'inline']) else: body = ast.Incr( ast.Symbol('C', ('i/%d' % cdim, 'index')), ast.Prod( ast.Symbol('A', ('i', ), ((ndofs * cdim, 'j*%d + k' % cdim), )), ast.Symbol('B', ('j', 'k')))) body = ast.c_for('k', cdim, body).children[0] body = [ ast.Decl('const int', ast.Symbol('index'), init=ast.Symbol('i%%%d' % cdim)), ast.Assign(ast.Symbol('C', ('i/%d' % cdim, 'index' % cdim)), '0.0'), ast.c_for('j', ndofs, body).children[0] ] body = ast.Block([ast.c_for('i', ndofs * cdim, body).children[0]]) funargs = [ ast.Decl('double* restrict', 'A'), ast.Decl('double *restrict *restrict', 'B'), ast.Decl('double *restrict *', 'C') ] fundecl = ast.FunDecl('void', name, funargs, body, ['static', 'inline']) # Track the AST for later fast retrieval self.asts[identifier] = fundecl return fundecl