def map_elementwise_linear(self, op, field_expr): field = self.rec(field_expr) from grudge.tools import is_zero if is_zero(field): return 0 in_discr = self.discrwb.discr_from_dd(op.dd_in) out_discr = self.discrwb.discr_from_dd(op.dd_out) result = out_discr.empty(self.array_context, dtype=field.entry_dtype) prg = self._elwise_linear_loopy_prg() for in_grp, out_grp in zip(in_discr.groups, out_discr.groups): cache_key = "elwise_linear", in_grp, out_grp, op, field.entry_dtype try: matrix = self.bound_op.operator_data_cache[cache_key] except KeyError: matrix = self.array_context.freeze( self.array_context.from_numpy( np.asarray(op.matrix(out_grp, in_grp), dtype=field.entry_dtype))) self.bound_op.operator_data_cache[cache_key] = matrix self.array_context.call_loopy(prg, mat=matrix, result=result[out_grp.index], vec=field[in_grp.index]) return result
def map_operator_binding(self, expr): field = self.rec(expr.field) from grudge.tools import is_zero if is_zero(field): return 0 return expr.op(field)
def map_constant(self, expr): from grudge.tools import is_zero if is_zero(expr): return 0 else: return op.OperatorBinding(op.InverseMassOperator(), self.outer_mass_contractor(expr))
def map_ref_face_mass_operator(self, op, field_expr): field = self.rec(field_expr) from grudge.tools import is_zero if is_zero(field): return 0 @memoize_in(self.bound_op, "face_mass_knl") def knl(): knl = lp.make_kernel( """{[k,i,f,j]: 0<=k<nelements and 0<=f<nfaces and 0<=i<nvol_nodes and 0<=j<nface_nodes}""", "result[k,i] = sum(f, sum(j, mat[i, f, j] * vec[f, k, j]))", default_offset=lp.auto, name="face_mass") knl = lp.split_iname(knl, "i", 16, inner_tag="l.0") return lp.tag_inames(knl, dict(k="g.0")) all_faces_conn = self.discrwb.connection_from_dds("vol", op.dd_in) all_faces_discr = all_faces_conn.to_discr vol_discr = all_faces_conn.from_discr result = vol_discr.empty( queue=self.queue, dtype=field.dtype, allocator=self.bound_op.allocator) assert len(all_faces_discr.groups) == len(vol_discr.groups) for afgrp, volgrp in zip(all_faces_discr.groups, vol_discr.groups): cache_key = "face_mass", afgrp, op, field.dtype nfaces = volgrp.mesh_el_group.nfaces try: matrix = self.bound_op.operator_data_cache[cache_key] except KeyError: matrix = op.matrix(afgrp, volgrp, field.dtype) matrix = ( cl.array.to_device(self.queue, matrix) .with_queue(None)) self.bound_op.operator_data_cache[cache_key] = matrix input_view = afgrp.view(field).reshape( nfaces, volgrp.nelements, afgrp.nunit_nodes) knl()(self.queue, mat=matrix, result=volgrp.view(result), vec=input_view) return result
def map_ref_face_mass_operator(self, op, field_expr): field = self.rec(field_expr) from grudge.tools import is_zero if is_zero(field): return 0 @memoize_in(self.array_context, (ExecutionMapper, "face_mass_knl")) def prg(): return make_loopy_program( """{[iel,idof,f,j]: 0<=iel<nelements and 0<=f<nfaces and 0<=idof<nvol_nodes and 0<=j<nface_nodes}""", """ result[iel,idof] = sum(f, sum(j, mat[idof, f, j] * vec[f, iel, j])) """, name="face_mass") all_faces_conn = self.discrwb.connection_from_dds("vol", op.dd_in) all_faces_discr = all_faces_conn.to_discr vol_discr = all_faces_conn.from_discr result = vol_discr.empty(self.array_context, dtype=field.entry_dtype) assert len(all_faces_discr.groups) == len(vol_discr.groups) for afgrp, volgrp in zip(all_faces_discr.groups, vol_discr.groups): cache_key = "face_mass", afgrp, op, field.dtype nfaces = volgrp.mesh_el_group.nfaces try: matrix = self.bound_op.operator_data_cache[cache_key] except KeyError: matrix = op.matrix(afgrp, volgrp, field.entry_dtype) matrix = self.array_context.freeze( self.array_context.from_numpy(matrix)) self.bound_op.operator_data_cache[cache_key] = matrix input_view = field[afgrp.index].reshape( nfaces, volgrp.nelements, afgrp.nunit_dofs) self.array_context.call_loopy( prg(), mat=matrix, result=result[volgrp.index], vec=input_view) return result
def incident_bc(self, w): """Flux terms for incident boundary conditions""" # NOTE: Untested for inhomogeneous materials, but would usually be # physically meaningless anyway (are there exceptions to this?) e, h = self.split_eh(w) from grudge.tools import count_subset fld_cnt = count_subset(self.get_eh_subset()) from grudge.tools import is_zero incident_bc_data = self.incident_bc_data(self, e, h) if is_zero(incident_bc_data): return make_obj_array([0]*fld_cnt) else: return sym.cse(-incident_bc_data)
def map_elementwise_linear(self, op, field_expr): field = self.rec(field_expr) from grudge.tools import is_zero if is_zero(field): return 0 @memoize_in(self.array_context, (ExecutionMapper, "elwise_linear_knl")) def prg(): result = make_loopy_program( """{[iel, idof, j]: 0<=iel<nelements and 0<=idof<ndiscr_nodes_out and 0<=j<ndiscr_nodes_in}""", "result[iel, idof] = sum(j, mat[idof, j] * vec[iel, j])", name="diff") result = lp.tag_array_axes(result, "mat", "stride:auto,stride:auto") return result in_discr = self.discrwb.discr_from_dd(op.dd_in) out_discr = self.discrwb.discr_from_dd(op.dd_out) result = out_discr.empty(self.array_context, dtype=field.entry_dtype) for in_grp, out_grp in zip(in_discr.groups, out_discr.groups): cache_key = "elwise_linear", in_grp, out_grp, op, field.dtype try: matrix = self.bound_op.operator_data_cache[cache_key] except KeyError: matrix = self.array_context.freeze( self.array_context.from_numpy( np.asarray( op.matrix(out_grp, in_grp), dtype=field.entry_dtype))) self.bound_op.operator_data_cache[cache_key] = matrix self.array_context.call_loopy( prg(), mat=matrix, result=result[out_grp.index], vec=field[in_grp.index]) return result
def map_elementwise_linear(self, op, field_expr): field = self.rec(field_expr) from grudge.tools import is_zero if is_zero(field): return 0 @memoize_in(self.bound_op, "elwise_linear_knl") def knl(): knl = lp.make_kernel( """{[k,i,j]: 0<=k<nelements and 0<=i<ndiscr_nodes_out and 0<=j<ndiscr_nodes_in}""", "result[k,i] = sum(j, mat[i, j] * vec[k, j])", default_offset=lp.auto, name="diff") knl = lp.split_iname(knl, "i", 16, inner_tag="l.0") return lp.tag_inames(knl, dict(k="g.0")) in_discr = self.discrwb.discr_from_dd(op.dd_in) out_discr = self.discrwb.discr_from_dd(op.dd_out) result = out_discr.empty( queue=self.queue, dtype=field.dtype, allocator=self.bound_op.allocator) for in_grp, out_grp in zip(in_discr.groups, out_discr.groups): cache_key = "elwise_linear", in_grp, out_grp, op, field.dtype try: matrix = self.bound_op.operator_data_cache[cache_key] except KeyError: matrix = ( cl.array.to_device( self.queue, np.asarray(op.matrix(out_grp, in_grp), dtype=field.dtype)) .with_queue(None)) self.bound_op.operator_data_cache[cache_key] = matrix knl()(self.queue, mat=matrix, result=out_grp.view(result), vec=in_grp.view(field)) return result
def bind_one(subexpr): if is_zero(subexpr): return subexpr else: from grudge.symbolic.primitives import OperatorBinding return OperatorBinding(self, subexpr)
def aggregate_assignments(inf_mapper, instructions, result, max_vectors_in_batch_expr): from pymbolic.primitives import Variable function_registry = inf_mapper.function_registry # {{{ aggregation helpers def get_complete_origins_set(insn, skip_levels=0): try: return insn_to_origins_cache[insn] except KeyError: pass if skip_levels < 0: skip_levels = 0 result = set() for dep in insn.get_dependencies(): if isinstance(dep, Variable): dep_origin = origins_map.get(dep.name, None) if dep_origin is not None: if skip_levels <= 0: result.add(dep_origin) result |= get_complete_origins_set( dep_origin, skip_levels-1) insn_to_origins_cache[insn] = result return result var_assignees_cache = {} def get_var_assignees(insn): try: return var_assignees_cache[insn] except KeyError: result = {Variable(assignee) for assignee in insn.get_assignees()} var_assignees_cache[insn] = result return result def aggregate_two_assignments(ass_1, ass_2): names = ass_1.names + ass_2.names from pymbolic.primitives import Variable deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \ - {Variable(name) for name in names} return Assign( names=names, exprs=ass_1.exprs + ass_2.exprs, _dependencies=deps, priority=max(ass_1.priority, ass_2.priority)) # }}} # {{{ main aggregation pass insn_to_origins_cache = {} origins_map = { assignee: insn for insn in instructions for assignee in insn.get_assignees()} from pytools import partition from grudge.symbolic.primitives import DTAG_SCALAR unprocessed_assigns, other_insns = partition( lambda insn: ( isinstance(insn, Assign) and not isinstance(insn, ToDiscretizationScopedAssign) and not isinstance(insn, FromDiscretizationScopedAssign) and not is_external_call(insn.exprs[0], function_registry) and not any( inf_mapper.infer_for_name(n).domain_tag == DTAG_SCALAR for n in insn.names)), instructions) # filter out zero-flop-count assigns--no need to bother with those processed_assigns, unprocessed_assigns = partition( lambda ass: ass.flop_count() == 0, unprocessed_assigns) # filter out zero assignments from grudge.tools import is_zero i = 0 while i < len(unprocessed_assigns): my_assign = unprocessed_assigns[i] if any(is_zero(expr) for expr in my_assign.exprs): processed_assigns.append(unprocessed_assigns.pop(i)) else: i += 1 # greedy aggregation while unprocessed_assigns: my_assign = unprocessed_assigns.pop() my_deps = my_assign.get_dependencies() my_assignees = get_var_assignees(my_assign) agg_candidates = [] for i, other_assign in enumerate(unprocessed_assigns): other_deps = other_assign.get_dependencies() other_assignees = get_var_assignees(other_assign) if ((my_deps & other_deps or my_deps & other_assignees or other_deps & my_assignees) and my_assign.priority == other_assign.priority): agg_candidates.append((i, other_assign)) did_work = False if agg_candidates: my_indirect_origins = get_complete_origins_set( my_assign, skip_levels=1) for other_assign_index, other_assign in agg_candidates: if max_vectors_in_batch_expr is not None: new_assignee_count = len( set(my_assign.get_assignees()) | set(other_assign.get_assignees())) new_dep_count = len( my_assign.get_dependencies( each_vector=True) | other_assign.get_dependencies( each_vector=True)) if (new_assignee_count + new_dep_count > max_vectors_in_batch_expr): continue other_indirect_origins = get_complete_origins_set( other_assign, skip_levels=1) if (my_assign not in other_indirect_origins and other_assign not in my_indirect_origins): did_work = True # aggregate the two assignments new_assignment = aggregate_two_assignments( my_assign, other_assign) del unprocessed_assigns[other_assign_index] unprocessed_assigns.append(new_assignment) for assignee in new_assignment.get_assignees(): origins_map[assignee] = new_assignment break if not did_work: processed_assigns.append(my_assign) externally_used_names = { expr for insn in processed_assigns + other_insns for expr in insn.get_dependencies()} if isinstance(result, np.ndarray) and result.dtype.char == "O": externally_used_names |= {expr for expr in result} else: externally_used_names |= {result} def schedule_and_finalize_assignment(ass): dep_mapper = _make_dep_mapper(include_subscripts=False) names_exprs = list(zip(ass.names, ass.exprs)) my_assignees = {name for name, expr in names_exprs} names_exprs_deps = [ (name, expr, {dep.name for dep in dep_mapper(expr) if isinstance(dep, Variable)} & my_assignees) for name, expr in names_exprs] ordered_names_exprs = [] available_names = set() while names_exprs_deps: schedulable = [] i = 0 while i < len(names_exprs_deps): name, expr, deps = names_exprs_deps[i] unsatisfied_deps = deps - available_names if not unsatisfied_deps: schedulable.append((str(expr), name, expr)) del names_exprs_deps[i] else: i += 1 # make sure these come out in a constant order schedulable.sort() if schedulable: for key, name, expr in schedulable: ordered_names_exprs.append((name, expr)) available_names.add(name) else: raise RuntimeError("aggregation resulted in an " "impossible assignment") return Assign( names=[name for name, expr in ordered_names_exprs], exprs=[expr for name, expr in ordered_names_exprs], do_not_return=[Variable(name) not in externally_used_names for name, expr in ordered_names_exprs], priority=ass.priority) return [schedule_and_finalize_assignment(ass) for ass in processed_assigns] + other_insns
def sym_operator(self, sensor_scaling=None, viscosity_only=False): u = self.cse_u rho = self.cse_rho rho_u = self.rho_u p = self.p e = self.e # {{{ artificial diffusion def make_artificial_diffusion(): if self.artificial_viscosity_mode not in ["diffusion"]: return 0 dq = self.grad_of_state() return make_obj_array([ self.div( to_vol_quad(self.sensor())*to_vol_quad(dq[i]), to_int_face_quad(self.sensor())*to_int_face_quad(dq[i])) for i in range(dq.shape[0])]) # }}} # {{{ state setup volq_flux = self.flux(self.volq_state()) faceq_flux = self.flux(self.faceq_state()) from grudge.symbolic.primitives import FunctionSymbol sqrt = FunctionSymbol("sqrt") speed = self.characteristic_velocity_optemplate(self.state()) has_viscosity = not is_zero(self.get_mu(self.state(), to_quad_op=None)) # }}} # {{{ operator assembly ----------------------------------------------- from grudge.flux.tools import make_lax_friedrichs_flux from grudge.symbolic.operators import InverseMassOperator from grudge.symbolic.tools import make_stiffness_t primitive_bcs_as_quad_conservative = { tag: self.primitive_to_conservative(to_bdry_quad(bc)) for tag, bc in self.get_primitive_boundary_conditions().items()} def get_bc_tuple(tag): state = self.state() bc = make_obj_array([ self.get_boundary_condition_for(tag, s_i) for s_i in state]) return tag, bc, self.flux(bc) first_order_part = InverseMassOperator()( numpy.dot(make_stiffness_t(self.dimensions), volq_flux) - make_lax_friedrichs_flux( wave_speed=cse(to_int_face_quad(speed), "emax_c"), state=self.faceq_state(), fluxes=faceq_flux, bdry_tags_states_and_fluxes=[ get_bc_tuple(tag) for tag in self.get_boundary_tags()], strong=False)) if viscosity_only: first_order_part = 0*first_order_part result = join_fields( first_order_part + self.make_second_order_part() + make_artificial_diffusion() + self.make_extra_terms(), speed) if self.source is not None: result = result + join_fields( make_sym_vector("source_vect", len(self.state())), # extra field for speed 0) return result