Example #1
0
    def fake_flux_face_data_block(self, block_count):
        discr = self.discr
        given = self.plan.given

        fh_struct = flux_header_struct(given.float_type, discr.dimensions)
        fp_struct = face_pair_struct(given.float_type, discr.dimensions)

        min_headers = []
        min_fp_blocks = []

        from random import randrange, choice

        face_dofs = self.plan.dofs_per_face()

        mp_count = discr.device.get_attribute(
                    cuda.device_attribute.MULTIPROCESSOR_COUNT)

        # FIXME
        assert False, "flux planning in the presence of quadrature needs to be fixed"

        for block_nr in range(mp_count):
            fp_structs = []

            faces = [(mb_nr, mb_el_nr, face_nr)
                    for mb_nr in range(self.plan.microblocks_per_block())
                    for mb_el_nr in range(given.microblock.elements)
                    for face_nr in range(self.plan.faces_per_el())]

            def draw_base():
                mb_nr, mb_el_nr, face_nr = choice(faces)
                return (block_nr * given.microblock.aligned_floats
                        * self.plan.microblocks_per_block()
                        + mb_nr * given.microblock.aligned_floats
                        + mb_el_nr * given.dofs_per_el())

            def draw_dest():
                mb_nr, mb_el_nr, face_nr = choice(faces)
                return (mb_nr * given.aligned_face_dofs_per_microblock()
                        + mb_el_nr * face_dofs * given.faces_per_el()
                        + face_nr * face_dofs)

            def bound_int(low, x, hi):
                return int(min(max(low, x), hi))

            from random import gauss
            pdata = self.plan.partition_data
            fp_count = bound_int(
                    0,
                    gauss(
                        pdata.face_pair_avg,
                        (pdata.max_face_pair_count-pdata.face_pair_avg)/2),
                    pdata.max_face_pair_count)


            for i in range(fp_count):
                fp_structs.append(
                        fp_struct.make(
                            h=0.5, order=2, face_jacobian=0.5,
                            normal=discr.dimensions*[0.1],

                            a_base=draw_base(), b_base=draw_base(),

                            a_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT)*face_dofs,
                            b_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT)*face_dofs,

                            boundary_bitmap=1,
                            b_write_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT)*face_dofs,

                            a_dest=draw_dest(), b_dest=draw_dest()
                            ))

            total_ext_face_count = bound_int(0,
                pdata.ext_face_avg + randrange(-1,2),
                fp_count)

            bdry_count = min(total_ext_face_count,
                    randrange(1+int(round(total_ext_face_count/6))))
            diff_count = total_ext_face_count-bdry_count

            min_headers.append(fh_struct.make(
                    same_facepairs_end=len(fp_structs)-total_ext_face_count,
                    diff_facepairs_end=diff_count,
                    bdry_facepairs_end=bdry_count))
            min_fp_blocks.append(fp_structs)

        dups = block_count//mp_count + 1
        headers = (min_headers * dups)[:block_count]
        fp_blocks = (min_fp_blocks * dups)[:block_count]

        from cgen import Value
        from hedge.backends.cuda.tools import make_superblocks

        return make_superblocks(
                given.devdata, "flux_data",
                [(headers, Value(fh_struct.tpname, "header")) ],
                [(fp_blocks, Value(fp_struct.tpname, "facepairs"))]
                )
Example #2
0
    def fake_flux_face_data_block(self, block_count):
        discr = self.discr
        given = self.plan.given

        fh_struct = flux_header_struct(given.float_type, discr.dimensions)
        fp_struct = face_pair_struct(given.float_type, discr.dimensions)

        min_headers = []
        min_fp_blocks = []

        from random import randrange, choice

        face_dofs = self.plan.dofs_per_face()

        mp_count = discr.device.get_attribute(
            cuda.device_attribute.MULTIPROCESSOR_COUNT)

        # FIXME
        assert False, "flux planning in the presence of quadrature needs to be fixed"

        for block_nr in range(mp_count):
            fp_structs = []

            faces = [(mb_nr, mb_el_nr, face_nr)
                     for mb_nr in range(self.plan.microblocks_per_block())
                     for mb_el_nr in range(given.microblock.elements)
                     for face_nr in range(self.plan.faces_per_el())]

            def draw_base():
                mb_nr, mb_el_nr, face_nr = choice(faces)
                return (block_nr * given.microblock.aligned_floats *
                        self.plan.microblocks_per_block() +
                        mb_nr * given.microblock.aligned_floats +
                        mb_el_nr * given.dofs_per_el())

            def draw_dest():
                mb_nr, mb_el_nr, face_nr = choice(faces)
                return (mb_nr * given.aligned_face_dofs_per_microblock() +
                        mb_el_nr * face_dofs * given.faces_per_el() +
                        face_nr * face_dofs)

            def bound_int(low, x, hi):
                return int(min(max(low, x), hi))

            from random import gauss
            pdata = self.plan.partition_data
            fp_count = bound_int(
                0,
                gauss(pdata.face_pair_avg,
                      (pdata.max_face_pair_count - pdata.face_pair_avg) / 2),
                pdata.max_face_pair_count)

            for i in range(fp_count):
                fp_structs.append(
                    fp_struct.make(
                        h=0.5,
                        order=2,
                        face_jacobian=0.5,
                        normal=discr.dimensions * [0.1],
                        a_base=draw_base(),
                        b_base=draw_base(),
                        a_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT) *
                        face_dofs,
                        b_ilist_index=randrange(self.FAKE_INDEX_LIST_COUNT) *
                        face_dofs,
                        boundary_bitmap=1,
                        b_write_ilist_index=randrange(
                            self.FAKE_INDEX_LIST_COUNT) * face_dofs,
                        a_dest=draw_dest(),
                        b_dest=draw_dest()))

            total_ext_face_count = bound_int(
                0, pdata.ext_face_avg + randrange(-1, 2), fp_count)

            bdry_count = min(
                total_ext_face_count,
                randrange(1 + int(round(total_ext_face_count / 6))))
            diff_count = total_ext_face_count - bdry_count

            min_headers.append(
                fh_struct.make(same_facepairs_end=len(fp_structs) -
                               total_ext_face_count,
                               diff_facepairs_end=diff_count,
                               bdry_facepairs_end=bdry_count))
            min_fp_blocks.append(fp_structs)

        dups = block_count // mp_count + 1
        headers = (min_headers * dups)[:block_count]
        fp_blocks = (min_fp_blocks * dups)[:block_count]

        from cgen import Value
        from hedge.backends.cuda.tools import make_superblocks

        return make_superblocks(
            given.devdata, "flux_data",
            [(headers, Value(fh_struct.tpname, "header"))],
            [(fp_blocks, Value(fp_struct.tpname, "facepairs"))])
Example #3
0
    def flux_face_data_block(self, elgroup):
        discr = self.discr
        given = self.plan.given
        fplan = self.plan
        headers = []
        fp_blocks = []

        INVALID_DEST = (1<<16)-1

        from hedge.backends.cuda import GPUBoundaryFaceStorage

        fh_struct = flux_header_struct(given.float_type, discr.dimensions)
        fp_struct = face_pair_struct(given.float_type, discr.dimensions)

        def find_elface_dest(el_face):
            num_in_block = discr.find_number_in_block(el_face[0])
            mb_index, index_in_mb = divmod(num_in_block, given.microblock.elements)
            return (mb_index * fplan.aligned_face_dofs_per_microblock()
                    + index_in_mb * elface_dofs
                    + el_face[1]*face_dofs)

        # {{{ quadrature setup, if necessary
        if fplan.quadrature_tag is not None:
            quad_info = discr.get_cuda_quadrature_info(
                    fplan.quadrature_tag)
            eg_quad_info = discr.get_cuda_elgroup_quadrature_info(
                    elgroup, fplan.quadrature_tag)
            ldis_quad_info = eg_quad_info.ldis_quad_info

            def find_el_src_index(el):
                block = discr.blocks[discr.partition[el.id]]

                mb_nr, in_mb_nr = divmod(block.el_number_map[el],
                    given.microblock.elements)

                return (block.number * fplan.input_dofs_per_block()
                        + mb_nr*eg_quad_info.aligned_int_face_dofs_per_microblock
                        + in_mb_nr*ldis_quad_info.face_node_count()
                        * ldis_quad_info.ldis.face_count())

            face_storage_map = quad_info.face_storage_info.map
        else:
            find_el_src_index = discr.find_el_gpu_index
            face_storage_map = discr.face_storage_info.map

        # }}}

        int_fp_count, ext_fp_count, bdry_fp_count = 0, 0, 0

        for block_nr, block in enumerate(discr.blocks):
            ldis = block.local_discretization
            face_dofs = fplan.dofs_per_face
            elface_dofs = face_dofs*ldis.face_count()

            faces_todo = set((el,face_nbr)
                    for mb in block.microblocks
                    for el in mb
                    for face_nbr in range(ldis.face_count()))
            same_fp_structs = []
            diff_fp_structs = []
            bdry_fp_structs = []

            while faces_todo:
                elface = faces_todo.pop()

                a_face = face_storage_map[elface]
                b_face = a_face.opposite

                if isinstance(b_face, GPUBoundaryFaceStorage):
                    # boundary face
                    b_base = b_face.gpu_bdry_index_in_floats
                    boundary_bitmap = self.executor.elface_to_bdry_bitmap.get(
                            a_face.el_face, 0)
                    b_write_index_list = 0 # doesn't matter
                    b_dest = INVALID_DEST

                    fp_structs = bdry_fp_structs
                    bdry_fp_count += 1
                else:
                    # interior face
                    b_base = find_el_src_index(b_face.el_face[0])
                    boundary_bitmap = 0

                    if b_face.native_block == a_face.native_block:
                        # same block
                        faces_todo.remove(b_face.el_face)
                        b_write_index_list = a_face.ext_write_index_list_id
                        b_dest = find_elface_dest(b_face.el_face)

                        fp_structs = same_fp_structs
                        int_fp_count += 1
                    else:
                        # different block
                        b_write_index_list = 0 # doesn't matter
                        b_dest = INVALID_DEST

                        fp_structs = diff_fp_structs
                        ext_fp_count += 1

                a_base = find_el_src_index(a_face.el_face[0])
                a_dest = find_elface_dest(a_face.el_face)

                fp_structs.append(
                        fp_struct.make(
                            h=a_face.face_pair_side.h,
                            order=a_face.face_pair_side.order,
                            face_jacobian=a_face.face_pair_side.face_jacobian,
                            normal=a_face.face_pair_side.normal,

                            a_base=a_base,
                            b_base=b_base,

                            a_ilist_index= \
                                    a_face.global_int_flux_index_list_id*face_dofs,
                            b_ilist_index= \
                                    a_face.global_ext_flux_index_list_id*face_dofs,

                            boundary_bitmap=boundary_bitmap,
                            b_write_ilist_index= \
                                    b_write_index_list*face_dofs,

                            a_dest=a_dest,
                            b_dest=b_dest
                            ))

            headers.append(fh_struct.make(
                    same_facepairs_end=\
                            len(same_fp_structs),
                    diff_facepairs_end=\
                            len(same_fp_structs)+len(diff_fp_structs),
                    bdry_facepairs_end=\
                            len(same_fp_structs)+len(diff_fp_structs)
                            +len(bdry_fp_structs),
                    ))
            fp_blocks.append(
                    same_fp_structs
                    +diff_fp_structs
                    +bdry_fp_structs)

        #print len(same_fp_structs), len(diff_fp_structs), len(bdry_fp_structs)

        from cgen import Value
        from hedge.backends.cuda.tools import make_superblocks

        return make_superblocks(
                given.devdata, "flux_data",
                [(headers, Value(fh_struct.tpname, "header"))],
                [(fp_blocks, Value(fp_struct.tpname, "facepairs"))],
                extra_fields={
                    "int_fp_count": int_fp_count,
                    "ext_fp_count": ext_fp_count,
                    "bdry_fp_count": bdry_fp_count,
                    "fp_count": int_fp_count+ext_fp_count+bdry_fp_count,
                    }
                )
Example #4
0
    def flux_face_data_block(self, elgroup):
        discr = self.discr
        given = self.plan.given
        fplan = self.plan
        headers = []
        fp_blocks = []

        INVALID_DEST = (1 << 16) - 1

        from hedge.backends.cuda import GPUBoundaryFaceStorage

        fh_struct = flux_header_struct(given.float_type, discr.dimensions)
        fp_struct = face_pair_struct(given.float_type, discr.dimensions)

        def find_elface_dest(el_face):
            num_in_block = discr.find_number_in_block(el_face[0])
            mb_index, index_in_mb = divmod(num_in_block,
                                           given.microblock.elements)
            return (mb_index * fplan.aligned_face_dofs_per_microblock() +
                    index_in_mb * elface_dofs + el_face[1] * face_dofs)

        # {{{ quadrature setup, if necessary
        if fplan.quadrature_tag is not None:
            quad_info = discr.get_cuda_quadrature_info(fplan.quadrature_tag)
            eg_quad_info = discr.get_cuda_elgroup_quadrature_info(
                elgroup, fplan.quadrature_tag)
            ldis_quad_info = eg_quad_info.ldis_quad_info

            def find_el_src_index(el):
                block = discr.blocks[discr.partition[el.id]]

                mb_nr, in_mb_nr = divmod(block.el_number_map[el],
                                         given.microblock.elements)

                return (
                    block.number * fplan.input_dofs_per_block() +
                    mb_nr * eg_quad_info.aligned_int_face_dofs_per_microblock +
                    in_mb_nr * ldis_quad_info.face_node_count() *
                    ldis_quad_info.ldis.face_count())

            face_storage_map = quad_info.face_storage_info.map
        else:
            find_el_src_index = discr.find_el_gpu_index
            face_storage_map = discr.face_storage_info.map

        # }}}

        int_fp_count, ext_fp_count, bdry_fp_count = 0, 0, 0

        for block_nr, block in enumerate(discr.blocks):
            ldis = block.local_discretization
            face_dofs = fplan.dofs_per_face
            elface_dofs = face_dofs * ldis.face_count()

            faces_todo = set((el, face_nbr) for mb in block.microblocks
                             for el in mb
                             for face_nbr in range(ldis.face_count()))
            same_fp_structs = []
            diff_fp_structs = []
            bdry_fp_structs = []

            while faces_todo:
                elface = faces_todo.pop()

                a_face = face_storage_map[elface]
                b_face = a_face.opposite

                if isinstance(b_face, GPUBoundaryFaceStorage):
                    # boundary face
                    b_base = b_face.gpu_bdry_index_in_floats
                    boundary_bitmap = self.executor.elface_to_bdry_bitmap.get(
                        a_face.el_face, 0)
                    b_write_index_list = 0  # doesn't matter
                    b_dest = INVALID_DEST

                    fp_structs = bdry_fp_structs
                    bdry_fp_count += 1
                else:
                    # interior face
                    b_base = find_el_src_index(b_face.el_face[0])
                    boundary_bitmap = 0

                    if b_face.native_block == a_face.native_block:
                        # same block
                        faces_todo.remove(b_face.el_face)
                        b_write_index_list = a_face.ext_write_index_list_id
                        b_dest = find_elface_dest(b_face.el_face)

                        fp_structs = same_fp_structs
                        int_fp_count += 1
                    else:
                        # different block
                        b_write_index_list = 0  # doesn't matter
                        b_dest = INVALID_DEST

                        fp_structs = diff_fp_structs
                        ext_fp_count += 1

                a_base = find_el_src_index(a_face.el_face[0])
                a_dest = find_elface_dest(a_face.el_face)

                fp_structs.append(
                        fp_struct.make(
                            h=a_face.face_pair_side.h,
                            order=a_face.face_pair_side.order,
                            face_jacobian=a_face.face_pair_side.face_jacobian,
                            normal=a_face.face_pair_side.normal,

                            a_base=a_base,
                            b_base=b_base,

                            a_ilist_index= \
                                    a_face.global_int_flux_index_list_id*face_dofs,
                            b_ilist_index= \
                                    a_face.global_ext_flux_index_list_id*face_dofs,

                            boundary_bitmap=boundary_bitmap,
                            b_write_ilist_index= \
                                    b_write_index_list*face_dofs,

                            a_dest=a_dest,
                            b_dest=b_dest
                            ))

            headers.append(fh_struct.make(
                    same_facepairs_end=\
                            len(same_fp_structs),
                    diff_facepairs_end=\
                            len(same_fp_structs)+len(diff_fp_structs),
                    bdry_facepairs_end=\
                            len(same_fp_structs)+len(diff_fp_structs)
                            +len(bdry_fp_structs),
                    ))
            fp_blocks.append(same_fp_structs + diff_fp_structs +
                             bdry_fp_structs)

        #print len(same_fp_structs), len(diff_fp_structs), len(bdry_fp_structs)

        from cgen import Value
        from hedge.backends.cuda.tools import make_superblocks

        return make_superblocks(
            given.devdata,
            "flux_data", [(headers, Value(fh_struct.tpname, "header"))],
            [(fp_blocks, Value(fp_struct.tpname, "facepairs"))],
            extra_fields={
                "int_fp_count": int_fp_count,
                "ext_fp_count": ext_fp_count,
                "bdry_fp_count": bdry_fp_count,
                "fp_count": int_fp_count + ext_fp_count + bdry_fp_count,
            })