def __init__(self, discr, optemplate, post_bind_mapper, type_hints={}): self.discr = discr self.elwise_linear_cache = {} from hedge.tools import diff_rst_flops, diff_rescale_one_flops, \ mass_flops, lift_flops self.diff_rst_flops = diff_rst_flops(discr) self.diff_rescale_one_flops = diff_rescale_one_flops(discr) self.mass_flops = mass_flops(discr) self.lift_flops = sum(lift_flops(fg) for fg in discr.face_groups) optemplate_stage1 = self.prepare_optemplate_stage1( optemplate, post_bind_mapper) # build a boundary tag bitmap from hedge.optemplate import BoundaryTagCollector self.boundary_tag_to_number = {} for btag in BoundaryTagCollector()(optemplate_stage1): self.boundary_tag_to_number.setdefault(btag, len(self.boundary_tag_to_number)) e2bb = self.elface_to_bdry_bitmap = {} for btag, bdry_number in self.boundary_tag_to_number.iteritems(): bdry_bit = 1 << bdry_number for elface in discr.mesh.tag_to_boundary.get(btag, []): e2bb[elface] = (e2bb.get(elface, 0) | bdry_bit) # compile the optemplate from struct import calcsize self.code = OperatorCompiler( max_vectors_in_batch_expr=220 // calcsize("P") )( self.prepare_optemplate_stage2(discr.mesh, optemplate_stage1, discr.debug, type_hints=type_hints), type_hints=type_hints) # build the local kernels self.diff_kernel = self.discr.diff_plan.make_kernel(discr) if "dump_op_code" in discr.debug: from hedge.tools import open_unique_debug_file open_unique_debug_file("op-code", ".txt").write( str(self.code))
def __init__(self, discr, optemplate, post_bind_mapper, type_hints={}): self.discr = discr self.elwise_linear_cache = {} from hedge.tools import diff_rst_flops, diff_rescale_one_flops, \ mass_flops, lift_flops self.diff_rst_flops = diff_rst_flops(discr) self.diff_rescale_one_flops = diff_rescale_one_flops(discr) self.mass_flops = mass_flops(discr) self.lift_flops = sum(lift_flops(fg) for fg in discr.face_groups) optemplate_stage1 = self.prepare_optemplate_stage1( optemplate, post_bind_mapper) # build a boundary tag bitmap from hedge.optemplate import BoundaryTagCollector self.boundary_tag_to_number = {} for btag in BoundaryTagCollector()(optemplate_stage1): self.boundary_tag_to_number.setdefault( btag, len(self.boundary_tag_to_number)) e2bb = self.elface_to_bdry_bitmap = {} for btag, bdry_number in self.boundary_tag_to_number.iteritems(): bdry_bit = 1 << bdry_number for elface in discr.mesh.tag_to_boundary.get(btag, []): e2bb[elface] = (e2bb.get(elface, 0) | bdry_bit) # compile the optemplate from struct import calcsize self.code = OperatorCompiler( max_vectors_in_batch_expr=220 // calcsize("P"))( self.prepare_optemplate_stage2(discr.mesh, optemplate_stage1, discr.debug, type_hints=type_hints), type_hints=type_hints) # build the local kernels self.diff_kernel = self.discr.diff_plan.make_kernel(discr) if "dump_op_code" in discr.debug: from hedge.tools import open_unique_debug_file open_unique_debug_file("op-code", ".txt").write(str(self.code))
def exec_flux_batch_assign(self, insn): from pymbolic.primitives import is_zero class ZeroSpec: pass class BoundaryZeros(ZeroSpec): pass class VolumeZeros(ZeroSpec): pass def eval_arg(arg_spec): arg_expr, is_int = arg_spec arg = self.rec(arg_expr) if is_zero(arg): if insn.is_boundary and not is_int: return BoundaryZeros() else: return VolumeZeros() else: return arg args = [ eval_arg(arg_expr) for arg_expr in insn.flux_var_info.arg_specs ] from pytools import common_dtype max_dtype = common_dtype( [a.dtype for a in args if not isinstance(a, ZeroSpec)], self.discr.default_scalar_type) def cast_arg(arg): if isinstance(arg, BoundaryZeros): return self.discr.boundary_zeros(insn.repr_op.boundary_tag, dtype=max_dtype) elif isinstance(arg, VolumeZeros): return self.discr.volume_zeros(dtype=max_dtype) elif isinstance(arg, np.ndarray): return np.asarray(arg, dtype=max_dtype) else: return arg args = [cast_arg(arg) for arg in args] if insn.quadrature_tag is None: if insn.is_boundary: face_groups = self.discr.get_boundary(insn.repr_op.boundary_tag)\ .face_groups else: face_groups = self.discr.face_groups else: if insn.is_boundary: face_groups = self.discr.get_boundary(insn.repr_op.boundary_tag)\ .get_quadrature_info(insn.quadrature_tag).face_groups else: face_groups = self.discr.get_quadrature_info(insn.quadrature_tag) \ .face_groups result = [] for fg in face_groups: # grab module module = insn.get_module(self.discr, max_dtype) func = module.gather_flux # set up argument structure arg_struct = module.ArgStruct() for arg_name, arg in zip(insn.flux_var_info.arg_names, args): setattr(arg_struct, arg_name, arg) for arg_num, scalar_arg_expr in enumerate( insn.flux_var_info.scalar_parameters): setattr(arg_struct, "_scalar_arg_%d" % arg_num, self.rec(scalar_arg_expr)) fof_shape = (fg.face_count * fg.face_length() * fg.element_count(), ) all_fluxes_on_faces = [ np.zeros(fof_shape, dtype=max_dtype) for f in insn.expressions ] for i, fof in enumerate(all_fluxes_on_faces): setattr(arg_struct, "flux%d_on_faces" % i, fof) # make sure everything ended up in Boost.Python attributes # (i.e. empty __dict__) assert not arg_struct.__dict__, arg_struct.__dict__.keys() # perform gather func(fg, arg_struct) # do lift, produce output for name, flux_bdg, fluxes_on_faces in zip(insn.names, insn.expressions, all_fluxes_on_faces): if insn.quadrature_tag is None: if flux_bdg.op.is_lift: mat = fg.ldis_loc.lifting_matrix() scaling = fg.local_el_inverse_jacobians else: mat = fg.ldis_loc.multi_face_mass_matrix() scaling = None else: assert not flux_bdg.op.is_lift mat = fg.ldis_loc_quad_info.multi_face_mass_matrix() scaling = None out = self.discr.volume_zeros(dtype=fluxes_on_faces.dtype) self.executor.lift_flux(fg, mat, scaling, fluxes_on_faces, out) if self.discr.instrumented: from hedge.tools import lift_flops # correct for quadrature, too. self.discr.lift_flop_counter.add(lift_flops(fg)) result.append((name, out)) if not face_groups: # No face groups? Still assign context variables. for name, flux_bdg in zip(insn.names, insn.expressions): result.append((name, self.discr.volume_zeros())) return result, []
def exec_flux_batch_assign(self, insn): from pymbolic.primitives import is_zero class ZeroSpec: pass class BoundaryZeros(ZeroSpec): pass class VolumeZeros(ZeroSpec): pass def eval_arg(arg_spec): arg_expr, is_int = arg_spec arg = self.rec(arg_expr) if is_zero(arg): if insn.is_boundary and not is_int: return BoundaryZeros() else: return VolumeZeros() else: return arg args = [eval_arg(arg_expr) for arg_expr in insn.flux_var_info.arg_specs] from pytools import common_dtype max_dtype = common_dtype( [a.dtype for a in args if not isinstance(a, ZeroSpec)], self.discr.default_scalar_type) def cast_arg(arg): if isinstance(arg, BoundaryZeros): return self.discr.boundary_zeros( insn.repr_op.boundary_tag, dtype=max_dtype) elif isinstance(arg, VolumeZeros): return self.discr.volume_zeros( dtype=max_dtype) elif isinstance(arg, np.ndarray): return np.asarray(arg, dtype=max_dtype) else: return arg args = [cast_arg(arg) for arg in args] if insn.quadrature_tag is None: if insn.is_boundary: face_groups = self.discr.get_boundary(insn.repr_op.boundary_tag)\ .face_groups else: face_groups = self.discr.face_groups else: if insn.is_boundary: face_groups = self.discr.get_boundary(insn.repr_op.boundary_tag)\ .get_quadrature_info(insn.quadrature_tag).face_groups else: face_groups = self.discr.get_quadrature_info(insn.quadrature_tag) \ .face_groups result = [] for fg in face_groups: # grab module module = insn.get_module(self.discr, max_dtype) func = module.gather_flux # set up argument structure arg_struct = module.ArgStruct() for arg_name, arg in zip(insn.flux_var_info.arg_names, args): setattr(arg_struct, arg_name, arg) for arg_num, scalar_arg_expr in enumerate( insn.flux_var_info.scalar_parameters): setattr(arg_struct, "_scalar_arg_%d" % arg_num, self.rec(scalar_arg_expr)) fof_shape = (fg.face_count*fg.face_length()*fg.element_count(),) all_fluxes_on_faces = [ np.zeros(fof_shape, dtype=max_dtype) for f in insn.expressions] for i, fof in enumerate(all_fluxes_on_faces): setattr(arg_struct, "flux%d_on_faces" % i, fof) # make sure everything ended up in Boost.Python attributes # (i.e. empty __dict__) assert not arg_struct.__dict__, arg_struct.__dict__.keys() # perform gather func(fg, arg_struct) # do lift, produce output for name, flux_bdg, fluxes_on_faces in zip(insn.names, insn.expressions, all_fluxes_on_faces): if insn.quadrature_tag is None: if flux_bdg.op.is_lift: mat = fg.ldis_loc.lifting_matrix() scaling = fg.local_el_inverse_jacobians else: mat = fg.ldis_loc.multi_face_mass_matrix() scaling = None else: assert not flux_bdg.op.is_lift mat = fg.ldis_loc_quad_info.multi_face_mass_matrix() scaling = None out = self.discr.volume_zeros(dtype=fluxes_on_faces.dtype) self.executor.lift_flux(fg, mat, scaling, fluxes_on_faces, out) if self.discr.instrumented: from hedge.tools import lift_flops # correct for quadrature, too. self.discr.lift_flop_counter.add(lift_flops(fg)) result.append((name, out)) if not face_groups: # No face groups? Still assign context variables. for name, flux_bdg in zip(insn.names, insn.expressions): result.append((name, self.discr.volume_zeros())) return result, []