예제 #1
0
 def get_direct_tex_mat_mul_code():
     return (
         [POD(float_type, "fof%d" % inl) for inl in range(par.inline)] +
         [POD(float_type, "lm"), Line()] + unroll(
             lambda j: [
                 Assign(
                     "fof%d" % inl,
                     "fp_tex1Dfetch(in_vector_tex, "
                     "GLOBAL_MB_PREIMG_DOF_BASE"
                     " + %(inl)d * ALIGNED_PREIMAGE_DOFS_PER_MB"
                     " + mb_el*PREIMAGE_DOFS_PER_EL+%(j)s)" % {
                         "j": j,
                         "inl": inl,
                         "row": "SEGMENT_DOF"
                     },
                 ) for inl in range(par.inline)
             ] + [
                 Assign(
                     "lm",
                     "smem_matrix["
                     "%(row)s*MATRIX_COLUMNS + %(j)s]" % {
                         "j": j,
                         "row": "SEGMENT_DOF"
                     },
                 )
             ] + [
                 S("result%(inl)d += fof%(inl)d*lm" % {"inl": inl})
                 for inl in range(par.inline)
             ],
             total_number=self.plan.preimage_dofs_per_el,
             max_unroll=self.plan.max_unroll) + [Line()])
예제 #2
0
 def get_direct_tex_mat_mul_code():
     return (
             [POD(float_type, "fof%d" % inl) for inl in range(par.inline)]
             + [POD(float_type, "lm"), Line()]
             + unroll(
                 lambda j: [
                 Assign("fof%d" % inl,
                     "fp_tex1Dfetch(in_vector_tex, "
                     "GLOBAL_MB_PREIMG_DOF_BASE"
                     " + %(inl)d * ALIGNED_PREIMAGE_DOFS_PER_MB"
                     " + mb_el*PREIMAGE_DOFS_PER_EL+%(j)s)"
                     % {"j":j, "inl":inl, "row": "SEGMENT_DOF"},)
                 for inl in range(par.inline)
                 ]+[
                 Assign("lm",
                     "smem_matrix["
                     "%(row)s*MATRIX_COLUMNS + %(j)s]"
                     % {"j":j, "row": "SEGMENT_DOF"},
                     )
                 ]+[
                 S("result%(inl)d += fof%(inl)d*lm" % {"inl":inl})
                 for inl in range(par.inline)
                 ],
                 total_number=self.plan.preimage_dofs_per_el,
                 max_unroll=self.plan.max_unroll)
             + [Line()])
예제 #3
0
        def get_scalar_diff_code():
            code = []
            for inl in range(par.inline):
                for axis in dims:
                    code.append(
                        Initializer(POD(float_type, "d%drst%d" % (inl, axis)),
                                    0))

            code.append(Line())

            def get_mat_entry(row, col, axis):
                return ("smem_diff_rst_mat["
                        "%(row)s*DIFFMAT_COLUMNS + %(axis)s*DOFS_PER_EL"
                        " + %(col)s"
                        "]" % {
                            "row": row,
                            "col": col,
                            "axis": axis
                        })

            tex_channels = ["x", "y", "z", "w"]
            from hedge.backends.cuda.tools import unroll
            code.extend([
                POD(float_type, "field_value%d" % inl)
                for inl in range(par.inline)
            ] + [Line()] + unroll(
                lambda j: [
                    Assign(
                        "field_value%d" % inl,
                        "fp_tex1Dfetch(field_tex, GLOBAL_MB_DOF_BASE + %d*ALIGNED_DOFS_PER_MB "
                        "+ mb_el*DOFS_PER_EL + %s)" % (inl, j))
                    for inl in range(par.inline)
                ] + [Line()] + [
                    S("d%drst%d += %s * field_value%d" %
                      (inl, axis, get_mat_entry("SEGMENT_DOF", j, axis), inl))
                    for axis in dims for inl in range(par.inline)
                ] + [Line()], given.dofs_per_el(), self.plan.max_unroll))

            store_code = Block()
            for inl in range(par.inline):
                for rst_axis in dims:
                    store_code.append(
                        Assign(
                            "drst%d_global[GLOBAL_MB_DOF_BASE"
                            " + %d*ALIGNED_DOFS_PER_MB + MB_DOF]" %
                            (rst_axis, inl),
                            "d%drst%d" % (inl, rst_axis),
                        ))

            code.append(If("MB_DOF < DOFS_PER_EL*ELS_PER_MB", store_code))

            return code
예제 #4
0
        def get_scalar_diff_code():
            code = []
            for inl in range(par.inline):
                for axis in dims:
                    code.append(
                        Initializer(POD(float_type, "d%drst%d" % (inl, axis)), 0))

            code.append(Line())

            def get_mat_entry(row, col, axis):
                return ("smem_diff_rst_mat["
                        "%(row)s*DIFFMAT_COLUMNS + %(axis)s*DOFS_PER_EL"
                        " + %(col)s"
                        "]" % {"row":row, "col":col, "axis":axis}
                        )

            tex_channels = ["x", "y", "z", "w"]
            from hedge.backends.cuda.tools import unroll
            code.extend(
                    [POD(float_type, "field_value%d" % inl)
                        for inl in range(par.inline)]
                    +[Line()]
                    +unroll(lambda j: [
                        Assign("field_value%d" % inl,
                            "fp_tex1Dfetch(field_tex, GLOBAL_MB_DOF_BASE + %d*ALIGNED_DOFS_PER_MB "
                            "+ mb_el*DOFS_PER_EL + %s)" % (inl, j)
                            )
                        for inl in range(par.inline)]
                        +[Line()]
                        +[S("d%drst%d += %s * field_value%d"
                            % (inl, axis, get_mat_entry("SEGMENT_DOF", j, axis), inl))
                        for axis in dims
                        for inl in range(par.inline)]
                        +[Line()],
                        given.dofs_per_el(), self.plan.max_unroll)
                    )

            store_code = Block()
            for inl in range(par.inline):
                for rst_axis in dims:
                    store_code.append(Assign(
                        "drst%d_global[GLOBAL_MB_DOF_BASE"
                        " + %d*ALIGNED_DOFS_PER_MB + MB_DOF]" % (rst_axis, inl),
                        "d%drst%d" % (inl, rst_axis),
                        ))

            code.append(If("MB_DOF < DOFS_PER_EL*ELS_PER_MB", store_code))

            return code
예제 #5
0
        def get_matmul_code():
            from hedge.backends.cuda.tools import unroll

            index_check_condition = "GLOBAL_MB_NR < microblock_count"

            def if_(conditions, then):
                final_cond = " && ".join(cond for cond in conditions if cond)
                if final_cond:
                    return If(final_cond, then)
                else:
                    return then

            result = Block([
                Comment("everybody needs to be done with the old data"),
                S("__syncthreads()"), Line(),
                ]+[If(index_check_condition, get_load_code())]+[
                Line(),
                Comment("all the new data must be loaded"),
                S("__syncthreads()"),
                Line(),
                ]+[
                Initializer(POD(float_type, "result%d" % inl), 0)
                for inl in range(par.inline)
                ]+[
                Line(),
                POD(float_type, "mat_entry"),
                Line(),
                ])

            result.append(if_(["IMAGE_MB_DOF < IMAGE_DOFS_PER_MB", index_check_condition],
                Block(unroll(lambda j:
                    [Assign("mat_entry", "fp_tex2D(mat_tex, IMAGE_EL_DOF, %s)" % j)]
                    +[
                    S("result%d += mat_entry "
                    "* smem_in_vector[PAR_MB_NR][%d][mb_el*PREIMAGE_DOFS_PER_EL + %s]"
                    % (inl, inl, j))
                    for inl in range(par.inline)
                    ],
                    total_number=plan.preimage_dofs_per_el)
                    +[Line()]
                    +[Assign(
                        "out_vector[GLOBAL_MB_IMAGE_DOF_BASE + "
                        "%d*ALIGNED_IMAGE_DOFS_PER_MB + IMAGE_MB_DOF]" % inl,
                        "result%d" % inl)
                    for inl in range(par.inline)]
                    )))

            return result
예제 #6
0
        def get_scalar_diff_code():
            code = []
            for inl in range(par.inline):
                for axis in dims:
                    code.append(Initializer(POD(float_type, "d%drst%d" % (inl, axis)), 0))

            code.append(Line())

            tex_channels = ["x", "y", "z", "w"]

            store_code = Block()
            for inl in range(par.inline):
                for rst_axis in dims:
                    store_code.append(
                        Assign(
                            "drst%d_global[GLOBAL_MB_IMAGE_DOF_BASE + "
                            "%d*ALIGNED_IMAGE_DOFS_PER_MB + IMAGE_MB_DOF]" % (rst_axis, inl),
                            "d%drst%d" % (inl, rst_axis),
                        )
                    )

            from hedge.backends.cuda.tools import unroll

            code.extend(
                [
                    Comment("everybody needs to be done with the old data"),
                    S("__syncthreads()"),
                    Line(),
                    get_load_code(),
                    Line(),
                    Comment("all the new data must be loaded"),
                    S("__syncthreads()"),
                    Line(),
                ]
            )

            if float_type == numpy.float32:
                code.append(Value("float%d" % rst_channels, "dmat_entries"))

            code.extend([POD(float_type, "field_value%d" % inl) for inl in range(par.inline)] + [Line()])

            def unroll_body(j):
                result = [
                    Assign("field_value%d" % inl, "smem_field[PAR_MB_NR][%d][mb_el*PREIMAGE_DOFS_PER_EL+%s]" % (inl, j))
                    for inl in range(par.inline)
                ]

                if float_type == numpy.float32:
                    result.append(
                        Assign("dmat_entries", "tex1Dfetch(diff_rst_mat_tex, IMAGE_EL_DOF + %s*IMAGE_DOFS_PER_EL)" % j)
                    )
                    result.extend(
                        S("d%drst%d += dmat_entries.%s * field_value%d" % (inl, axis, tex_channels[axis], inl))
                        for inl in range(par.inline)
                        for axis in dims
                    )
                elif float_type == numpy.float64:
                    result.extend(
                        S(
                            "d%(inl)drst%(axis)d += "
                            "fp_tex1Dfetch(diff_rst_mat_tex, %(axis)d "
                            "+ DIMENSIONS*(IMAGE_EL_DOF + %(j)d*IMAGE_DOFS_PER_EL))"
                            "* field_value%(inl)d" % {"inl": inl, "axis": axis, "j": j}
                        )
                        for inl in range(par.inline)
                        for axis in dims
                    )
                else:
                    assert False

                return result

            code.append(
                If(
                    "IMAGE_MB_DOF < IMAGE_DOFS_PER_MB",
                    Block(unroll(unroll_body, total_number=plan.preimage_dofs_per_el) + [store_code]),
                )
            )

            return code
예제 #7
0
        def get_scalar_diff_code():
            code = []
            for inl in range(par.inline):
                for axis in dims:
                    code.append(
                        Initializer(POD(float_type, "d%drst%d" % (inl, axis)), 0))

            code.append(Line())

            tex_channels = ["x", "y", "z", "w"]

            store_code = Block()
            for inl in range(par.inline):
                for rst_axis in dims:
                    store_code.append(Assign(
                        "drst%d_global[GLOBAL_MB_IMAGE_DOF_BASE + "
                        "%d*ALIGNED_IMAGE_DOFS_PER_MB + IMAGE_MB_DOF]"
                        % (rst_axis, inl),
                        "d%drst%d" % (inl, rst_axis)
                        ))

            from hedge.backends.cuda.tools import unroll
            code.extend([
                Comment("everybody needs to be done with the old data"),
                S("__syncthreads()"),
                Line(),
                get_load_code(),
                Line(),
                Comment("all the new data must be loaded"),
                S("__syncthreads()"),
                Line(),
                ])

            if float_type == numpy.float32:
                code.append(Value("float%d" % rst_channels, "dmat_entries"))

            code.extend([
                POD(float_type, "field_value%d" % inl)
                for inl in range(par.inline)
                ]+[Line()])

            def unroll_body(j):
                result = [
                    Assign("field_value%d" % inl,
                        "smem_field[PAR_MB_NR][%d][mb_el*PREIMAGE_DOFS_PER_EL+%s]" % (inl, j))
                    for inl in range(par.inline)
                    ]

                if float_type == numpy.float32:
                    result.append(Assign("dmat_entries",
                        "tex1Dfetch(diff_rst_mat_tex, IMAGE_EL_DOF + %s*IMAGE_DOFS_PER_EL)" % j))
                    result.extend(
                        S("d%drst%d += dmat_entries.%s * field_value%d"
                            % (inl, axis, tex_channels[axis], inl))
                        for inl in range(par.inline)
                        for axis in dims)
                elif float_type == numpy.float64:
                    result.extend(
                        S("d%(inl)drst%(axis)d += "
                            "fp_tex1Dfetch(diff_rst_mat_tex, %(axis)d "
                            "+ DIMENSIONS*(IMAGE_EL_DOF + %(j)d*IMAGE_DOFS_PER_EL))"
                            "* field_value%(inl)d" % {
                            "inl": inl,
                            "axis": axis,
                            "j": j
                            })
                        for inl in range(par.inline)
                        for axis in dims)
                else:
                    assert False

                return result

            code.append(If("IMAGE_MB_DOF < IMAGE_DOFS_PER_MB", Block(unroll(unroll_body,
                    total_number=plan.preimage_dofs_per_el)
                    +[store_code])))

            return code