def fake_flux_face_data_block(self, block_count): discr = self.discr given = self.plan.given fh_struct = flux_header_struct(given.float_type, discr.dimensions) fp_struct = face_pair_struct(given.float_type, discr.dimensions) min_headers = [] min_fp_blocks = [] from random import randrange, choice face_dofs = self.plan.dofs_per_face() mp_count = discr.device.get_attribute( cuda.device_attribute.MULTIPROCESSOR_COUNT) # FIXME assert False, "flux planning in the presence of quadrature needs to be fixed" for block_nr in range(mp_count): fp_structs = [] faces = [(mb_nr, mb_el_nr, face_nr) for mb_nr in range(self.plan.microblocks_per_block()) for mb_el_nr in range(given.microblock.elements) for face_nr in range(self.plan.faces_per_el())] def draw_base(): mb_nr, mb_el_nr, face_nr = choice(faces) return (block_nr * given.microblock.aligned_floats * self.plan.microblocks_per_block() + mb_nr * given.microblock.aligned_floats + mb_el_nr * given.dofs_per_el()) def draw_dest(): mb_nr, mb_el_nr, face_nr = choice(faces) return (mb_nr * given.aligned_face_dofs_per_microblock() + mb_el_nr * face_dofs * given.faces_per_el() + face_nr * face_dofs) def bound_int(low, x, hi): return int(min(max(low, x), hi)) from random import gauss pdata = self.plan.partition_data fp_count = bound_int( 0, gauss( pdata.face_pair_avg, (pdata.max_face_pair_count-pdata.face_pair_avg)/2), pdata.max_face_pair_count) for i in range(fp_count): fp_structs.append( fp_struct.make( h=0.5, order=2, face_jacobian=0.5, normal=discr.dimensions*[0.1], a_base=draw_base(), b_base=draw_base(), a_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT)*face_dofs, b_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT)*face_dofs, boundary_bitmap=1, b_write_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT)*face_dofs, a_dest=draw_dest(), b_dest=draw_dest() )) total_ext_face_count = bound_int(0, pdata.ext_face_avg + randrange(-1,2), fp_count) bdry_count = min(total_ext_face_count, randrange(1+int(round(total_ext_face_count/6)))) diff_count = total_ext_face_count-bdry_count min_headers.append(fh_struct.make( same_facepairs_end=len(fp_structs)-total_ext_face_count, diff_facepairs_end=diff_count, bdry_facepairs_end=bdry_count)) min_fp_blocks.append(fp_structs) dups = block_count//mp_count + 1 headers = (min_headers * dups)[:block_count] fp_blocks = (min_fp_blocks * dups)[:block_count] from cgen import Value from hedge.backends.cuda.tools import make_superblocks return make_superblocks( given.devdata, "flux_data", [(headers, Value(fh_struct.tpname, "header")) ], [(fp_blocks, Value(fp_struct.tpname, "facepairs"))] )
def fake_flux_face_data_block(self, block_count): discr = self.discr given = self.plan.given fh_struct = flux_header_struct(given.float_type, discr.dimensions) fp_struct = face_pair_struct(given.float_type, discr.dimensions) min_headers = [] min_fp_blocks = [] from random import randrange, choice face_dofs = self.plan.dofs_per_face() mp_count = discr.device.get_attribute( cuda.device_attribute.MULTIPROCESSOR_COUNT) # FIXME assert False, "flux planning in the presence of quadrature needs to be fixed" for block_nr in range(mp_count): fp_structs = [] faces = [(mb_nr, mb_el_nr, face_nr) for mb_nr in range(self.plan.microblocks_per_block()) for mb_el_nr in range(given.microblock.elements) for face_nr in range(self.plan.faces_per_el())] def draw_base(): mb_nr, mb_el_nr, face_nr = choice(faces) return (block_nr * given.microblock.aligned_floats * self.plan.microblocks_per_block() + mb_nr * given.microblock.aligned_floats + mb_el_nr * given.dofs_per_el()) def draw_dest(): mb_nr, mb_el_nr, face_nr = choice(faces) return (mb_nr * given.aligned_face_dofs_per_microblock() + mb_el_nr * face_dofs * given.faces_per_el() + face_nr * face_dofs) def bound_int(low, x, hi): return int(min(max(low, x), hi)) from random import gauss pdata = self.plan.partition_data fp_count = bound_int( 0, gauss(pdata.face_pair_avg, (pdata.max_face_pair_count - pdata.face_pair_avg) / 2), pdata.max_face_pair_count) for i in range(fp_count): fp_structs.append( fp_struct.make( h=0.5, order=2, face_jacobian=0.5, normal=discr.dimensions * [0.1], a_base=draw_base(), b_base=draw_base(), a_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT) * face_dofs, b_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT) * face_dofs, boundary_bitmap=1, b_write_ilist_index=randrange( self.FAKE_INDEX_LIST_COUNT) * face_dofs, a_dest=draw_dest(), b_dest=draw_dest())) total_ext_face_count = bound_int( 0, pdata.ext_face_avg + randrange(-1, 2), fp_count) bdry_count = min( total_ext_face_count, randrange(1 + int(round(total_ext_face_count / 6)))) diff_count = total_ext_face_count - bdry_count min_headers.append( fh_struct.make(same_facepairs_end=len(fp_structs) - total_ext_face_count, diff_facepairs_end=diff_count, bdry_facepairs_end=bdry_count)) min_fp_blocks.append(fp_structs) dups = block_count // mp_count + 1 headers = (min_headers * dups)[:block_count] fp_blocks = (min_fp_blocks * dups)[:block_count] from cgen import Value from hedge.backends.cuda.tools import make_superblocks return make_superblocks( given.devdata, "flux_data", [(headers, Value(fh_struct.tpname, "header"))], [(fp_blocks, Value(fp_struct.tpname, "facepairs"))])
def flux_face_data_block(self, elgroup): discr = self.discr given = self.plan.given fplan = self.plan headers = [] fp_blocks = [] INVALID_DEST = (1<<16)-1 from hedge.backends.cuda import GPUBoundaryFaceStorage fh_struct = flux_header_struct(given.float_type, discr.dimensions) fp_struct = face_pair_struct(given.float_type, discr.dimensions) def find_elface_dest(el_face): num_in_block = discr.find_number_in_block(el_face[0]) mb_index, index_in_mb = divmod(num_in_block, given.microblock.elements) return (mb_index * fplan.aligned_face_dofs_per_microblock() + index_in_mb * elface_dofs + el_face[1]*face_dofs) # {{{ quadrature setup, if necessary if fplan.quadrature_tag is not None: quad_info = discr.get_cuda_quadrature_info( fplan.quadrature_tag) eg_quad_info = discr.get_cuda_elgroup_quadrature_info( elgroup, fplan.quadrature_tag) ldis_quad_info = eg_quad_info.ldis_quad_info def find_el_src_index(el): block = discr.blocks[discr.partition[el.id]] mb_nr, in_mb_nr = divmod(block.el_number_map[el], given.microblock.elements) return (block.number * fplan.input_dofs_per_block() + mb_nr*eg_quad_info.aligned_int_face_dofs_per_microblock + in_mb_nr*ldis_quad_info.face_node_count() * ldis_quad_info.ldis.face_count()) face_storage_map = quad_info.face_storage_info.map else: find_el_src_index = discr.find_el_gpu_index face_storage_map = discr.face_storage_info.map # }}} int_fp_count, ext_fp_count, bdry_fp_count = 0, 0, 0 for block_nr, block in enumerate(discr.blocks): ldis = block.local_discretization face_dofs = fplan.dofs_per_face elface_dofs = face_dofs*ldis.face_count() faces_todo = set((el,face_nbr) for mb in block.microblocks for el in mb for face_nbr in range(ldis.face_count())) same_fp_structs = [] diff_fp_structs = [] bdry_fp_structs = [] while faces_todo: elface = faces_todo.pop() a_face = face_storage_map[elface] b_face = a_face.opposite if isinstance(b_face, GPUBoundaryFaceStorage): # boundary face b_base = b_face.gpu_bdry_index_in_floats boundary_bitmap = self.executor.elface_to_bdry_bitmap.get( a_face.el_face, 0) b_write_index_list = 0 # doesn't matter b_dest = INVALID_DEST fp_structs = bdry_fp_structs bdry_fp_count += 1 else: # interior face b_base = find_el_src_index(b_face.el_face[0]) boundary_bitmap = 0 if b_face.native_block == a_face.native_block: # same block faces_todo.remove(b_face.el_face) b_write_index_list = a_face.ext_write_index_list_id b_dest = find_elface_dest(b_face.el_face) fp_structs = same_fp_structs int_fp_count += 1 else: # different block b_write_index_list = 0 # doesn't matter b_dest = INVALID_DEST fp_structs = diff_fp_structs ext_fp_count += 1 a_base = find_el_src_index(a_face.el_face[0]) a_dest = find_elface_dest(a_face.el_face) fp_structs.append( fp_struct.make( h=a_face.face_pair_side.h, order=a_face.face_pair_side.order, face_jacobian=a_face.face_pair_side.face_jacobian, normal=a_face.face_pair_side.normal, a_base=a_base, b_base=b_base, a_ilist_index= \ a_face.global_int_flux_index_list_id*face_dofs, b_ilist_index= \ a_face.global_ext_flux_index_list_id*face_dofs, boundary_bitmap=boundary_bitmap, b_write_ilist_index= \ b_write_index_list*face_dofs, a_dest=a_dest, b_dest=b_dest )) headers.append(fh_struct.make( same_facepairs_end=\ len(same_fp_structs), diff_facepairs_end=\ len(same_fp_structs)+len(diff_fp_structs), bdry_facepairs_end=\ len(same_fp_structs)+len(diff_fp_structs) +len(bdry_fp_structs), )) fp_blocks.append( same_fp_structs +diff_fp_structs +bdry_fp_structs) #print len(same_fp_structs), len(diff_fp_structs), len(bdry_fp_structs) from cgen import Value from hedge.backends.cuda.tools import make_superblocks return make_superblocks( given.devdata, "flux_data", [(headers, Value(fh_struct.tpname, "header"))], [(fp_blocks, Value(fp_struct.tpname, "facepairs"))], extra_fields={ "int_fp_count": int_fp_count, "ext_fp_count": ext_fp_count, "bdry_fp_count": bdry_fp_count, "fp_count": int_fp_count+ext_fp_count+bdry_fp_count, } )
def flux_face_data_block(self, elgroup): discr = self.discr given = self.plan.given fplan = self.plan headers = [] fp_blocks = [] INVALID_DEST = (1 << 16) - 1 from hedge.backends.cuda import GPUBoundaryFaceStorage fh_struct = flux_header_struct(given.float_type, discr.dimensions) fp_struct = face_pair_struct(given.float_type, discr.dimensions) def find_elface_dest(el_face): num_in_block = discr.find_number_in_block(el_face[0]) mb_index, index_in_mb = divmod(num_in_block, given.microblock.elements) return (mb_index * fplan.aligned_face_dofs_per_microblock() + index_in_mb * elface_dofs + el_face[1] * face_dofs) # {{{ quadrature setup, if necessary if fplan.quadrature_tag is not None: quad_info = discr.get_cuda_quadrature_info(fplan.quadrature_tag) eg_quad_info = discr.get_cuda_elgroup_quadrature_info( elgroup, fplan.quadrature_tag) ldis_quad_info = eg_quad_info.ldis_quad_info def find_el_src_index(el): block = discr.blocks[discr.partition[el.id]] mb_nr, in_mb_nr = divmod(block.el_number_map[el], given.microblock.elements) return ( block.number * fplan.input_dofs_per_block() + mb_nr * eg_quad_info.aligned_int_face_dofs_per_microblock + in_mb_nr * ldis_quad_info.face_node_count() * ldis_quad_info.ldis.face_count()) face_storage_map = quad_info.face_storage_info.map else: find_el_src_index = discr.find_el_gpu_index face_storage_map = discr.face_storage_info.map # }}} int_fp_count, ext_fp_count, bdry_fp_count = 0, 0, 0 for block_nr, block in enumerate(discr.blocks): ldis = block.local_discretization face_dofs = fplan.dofs_per_face elface_dofs = face_dofs * ldis.face_count() faces_todo = set((el, face_nbr) for mb in block.microblocks for el in mb for face_nbr in range(ldis.face_count())) same_fp_structs = [] diff_fp_structs = [] bdry_fp_structs = [] while faces_todo: elface = faces_todo.pop() a_face = face_storage_map[elface] b_face = a_face.opposite if isinstance(b_face, GPUBoundaryFaceStorage): # boundary face b_base = b_face.gpu_bdry_index_in_floats boundary_bitmap = self.executor.elface_to_bdry_bitmap.get( a_face.el_face, 0) b_write_index_list = 0 # doesn't matter b_dest = INVALID_DEST fp_structs = bdry_fp_structs bdry_fp_count += 1 else: # interior face b_base = find_el_src_index(b_face.el_face[0]) boundary_bitmap = 0 if b_face.native_block == a_face.native_block: # same block faces_todo.remove(b_face.el_face) b_write_index_list = a_face.ext_write_index_list_id b_dest = find_elface_dest(b_face.el_face) fp_structs = same_fp_structs int_fp_count += 1 else: # different block b_write_index_list = 0 # doesn't matter b_dest = INVALID_DEST fp_structs = diff_fp_structs ext_fp_count += 1 a_base = find_el_src_index(a_face.el_face[0]) a_dest = find_elface_dest(a_face.el_face) fp_structs.append( fp_struct.make( h=a_face.face_pair_side.h, order=a_face.face_pair_side.order, face_jacobian=a_face.face_pair_side.face_jacobian, normal=a_face.face_pair_side.normal, a_base=a_base, b_base=b_base, a_ilist_index= \ a_face.global_int_flux_index_list_id*face_dofs, b_ilist_index= \ a_face.global_ext_flux_index_list_id*face_dofs, boundary_bitmap=boundary_bitmap, b_write_ilist_index= \ b_write_index_list*face_dofs, a_dest=a_dest, b_dest=b_dest )) headers.append(fh_struct.make( same_facepairs_end=\ len(same_fp_structs), diff_facepairs_end=\ len(same_fp_structs)+len(diff_fp_structs), bdry_facepairs_end=\ len(same_fp_structs)+len(diff_fp_structs) +len(bdry_fp_structs), )) fp_blocks.append(same_fp_structs + diff_fp_structs + bdry_fp_structs) #print len(same_fp_structs), len(diff_fp_structs), len(bdry_fp_structs) from cgen import Value from hedge.backends.cuda.tools import make_superblocks return make_superblocks( given.devdata, "flux_data", [(headers, Value(fh_struct.tpname, "header"))], [(fp_blocks, Value(fp_struct.tpname, "facepairs"))], extra_fields={ "int_fp_count": int_fp_count, "ext_fp_count": ext_fp_count, "bdry_fp_count": bdry_fp_count, "fp_count": int_fp_count + ext_fp_count + bdry_fp_count, })