Beispiel #1
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[icenter]: 0<=icenter<ncenters}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for icenter
                    <> isrc_box = qbx_center_to_target_box[icenter]

                    # The box's expansions which we're translating here
                    # (our source) is, globally speaking, a target box.

                    <> src_ibox = target_boxes[isrc_box] \
                        {id=read_src_ibox}

                    # Is the box number on the level currently under
                    # consideration?
                    <> in_range = (target_base_ibox <= src_ibox
                            and src_ibox < target_base_ibox + nboxes)

                    if in_range
                        <> tgt_center[idim] = qbx_centers[idim, icenter]
                        <> src_center[idim] = centers[idim, src_ibox] {dup=idim}

                        <> tgt_rscale = qbx_expansion_radii[icenter]

                        <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim}

                        """] + ["""
                        <> src_coeff{i} = \
                                expansions[src_ibox - target_base_ibox, {i}] \
                                {{dep=read_src_ibox}}
                        """.format(i=i) for i in range(ncoeff_src)] + [
                        ] + self.get_translation_loopy_insns() + ["""
                        qbx_expansions[icenter, {i}] = \
                            qbx_expansions[icenter, {i}] + coeff{i} \
                            {{id_prefix=write_expn}}
                        """.format(i=i)
                            for i in range(ncoeff_tgt)] + ["""
                    end
                end
                """],
                [
                    lp.GlobalArg("target_boxes", None, shape=None,
                        offset=lp.auto),
                    lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
                    lp.ValueArg("src_rscale", None),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"),
                    lp.ValueArg("naligned_boxes,target_base_ibox,nboxes", np.int32),
                    lp.GlobalArg("expansions", None,
                        shape=("nboxes", ncoeff_src), offset=lp.auto),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name,
                assumptions="ncenters>=1",
                silenced_warnings="write_race(write_expn*)",
                fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim),
                lang_version=MOST_RECENT_LANGUAGE_VERSION)

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")

        return loopy_knl
Beispiel #2
0
    def get_kernel(self):
        if self.src_expansion is not self.tgt_expansion:
            raise RuntimeError("%s requires that the source "
                    "and target expansion are the same object"
                    % self.default_name)

        ncoeffs = len(self.src_expansion)

        # To clarify terminology:
        #
        # isrc_box -> The index in a list of (in this case, source) boxes
        # src_ibox -> The (global) box number for the (in this case, source) box
        #
        # (same for itgt_box, tgt_ibox)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for itgt_box
                    <> tgt_ibox = target_boxes[itgt_box]

                    <> tgt_center[idim] = centers[idim, tgt_ibox] \

                    <> src_ibox = box_parent_ids[tgt_ibox] \
                        {id=read_src_ibox}

                    <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
                    <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim}

                    """] + ["""
                    <> src_coeff{i} = \
                        src_expansions[src_ibox - src_base_ibox, {i}] \
                        {{id_prefix=read_expn,dep=read_src_ibox}}
                    """.format(i=i) for i in range(ncoeffs)] + [

                    ] + self.get_translation_loopy_insns() + ["""

                    tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \
                        tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] + coeff{i} \
                        {{id_prefix=write_expn,nosync=read_expn*}}
                    """.format(i=i) for i in range(ncoeffs)] + ["""
                end
                """],
                [
                    lp.GlobalArg("target_boxes", None, shape=lp.auto,
                        offset=lp.auto),
                    lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
                    lp.ValueArg("src_rscale,tgt_rscale", None),
                    lp.ValueArg("naligned_boxes,nboxes", np.int32),
                    lp.ValueArg("tgt_base_ibox,src_base_ibox", np.int32),
                    lp.ValueArg("ntgt_level_boxes,nsrc_level_boxes", np.int32),
                    lp.GlobalArg("box_parent_ids", None, shape="nboxes"),
                    lp.GlobalArg("tgt_expansions", None,
                        shape=("ntgt_level_boxes", ncoeffs), offset=lp.auto),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nsrc_level_boxes", ncoeffs), offset=lp.auto),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name, assumptions="ntgt_boxes>=1",
                silenced_warnings="write_race(write_expn*)",
                fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim),
                lang_version=MOST_RECENT_LANGUAGE_VERSION)

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")

        return loopy_knl
Beispiel #3
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[icenter]: 0<=icenter<ncenters}",
                    "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for icenter
                    <> icontaining_tgt_box = \
                        qbx_center_to_target_box_source_level[icenter]

                    if icontaining_tgt_box != -1
                        <> tgt_center[idim] = qbx_centers[idim, icenter] \
                                {id=fetch_tgt_center}
                        <> tgt_rscale = qbx_expansion_radii[icenter]

                        <> isrc_start = src_box_starts[icontaining_tgt_box]
                        <> isrc_stop = src_box_starts[icontaining_tgt_box+1]

                        for isrc_box
                            <> src_ibox = src_box_lists[isrc_box] \
                                    {id=read_src_ibox}
                            <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
                            <> d[idim] = tgt_center[idim] - src_center[idim] \
                                    {dup=idim}
                            """] + ["""

                            <> src_coeff{i} = \
                                src_expansions[src_ibox - src_base_ibox, {i}] \
                                {{dep=read_src_ibox}}

                            """.format(i=i) for i in range(ncoeff_src)] + [

                            ] + self.get_translation_loopy_insns() + ["""

                        end
                        """] + ["""
                        qbx_expansions[icenter, {i}] = \
                                qbx_expansions[icenter, {i}] + \
                                simul_reduce(sum, isrc_box, coeff{i}) \
                                {{id_prefix=write_expn}}
                        """.format(i=i)
                                for i in range(ncoeff_tgt)] + ["""
                    end
                end
                """],
                [
                    lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                    lp.ValueArg("src_rscale", None),
                    lp.GlobalArg("src_box_starts, src_box_lists",
                        None, shape=None, strides=(1,)),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"),
                    lp.ValueArg("aligned_nboxes,nsrc_level_boxes", np.int32),
                    lp.ValueArg("src_base_ibox", np.int32),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto),
                    lp.GlobalArg("qbx_expansions", None,
                        shape=("ncenters", ncoeff_tgt)),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name, assumptions="ncenters>=1",
                silenced_warnings="write_race(write_expn*)",
                fixed_parameters=dict(dim=self.dim),
                lang_version=MOST_RECENT_LANGUAGE_VERSION)

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")

        return loopy_knl
Beispiel #4
0
    def get_kernel(self):
        if self.src_expansion is not self.tgt_expansion:
            raise RuntimeError("%s requires that the source "
                    "and target expansion are the same object"
                    % self.default_name)

        ncoeffs = len(self.src_expansion)

        # To clarify terminology:
        #
        # isrc_box -> The index in a list of (in this case, source) boxes
        # src_ibox -> The (global) box number for the (in this case, source) box
        #
        # (same for itgt_box, tgt_ibox)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for itgt_box
                    <> tgt_ibox = target_boxes[itgt_box]

                    <> tgt_center[idim] = centers[idim, tgt_ibox] \

                    <> src_ibox = box_parent_ids[tgt_ibox] \
                        {id=read_src_ibox}

                    <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
                    <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim}

                    """] + ["""
                    <> src_coeff{i} = \
                        src_expansions[src_ibox - src_base_ibox, {i}] \
                        {{id_prefix=read_expn,dep=read_src_ibox}}
                    """.format(i=i) for i in range(ncoeffs)] + [

                    ] + self.get_translation_loopy_insns() + ["""

                    tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \
                        tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] + coeff{i} \
                        {{id_prefix=write_expn,nosync=read_expn*}}
                    """.format(i=i) for i in range(ncoeffs)] + ["""
                end
                """],
                [
                    lp.GlobalArg("target_boxes", None, shape=lp.auto,
                        offset=lp.auto),
                    lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
                    lp.ValueArg("src_rscale,tgt_rscale", None),
                    lp.ValueArg("naligned_boxes,nboxes", np.int32),
                    lp.ValueArg("tgt_base_ibox,src_base_ibox", np.int32),
                    lp.ValueArg("ntgt_level_boxes,nsrc_level_boxes", np.int32),
                    lp.GlobalArg("box_parent_ids", None, shape="nboxes"),
                    lp.GlobalArg("tgt_expansions", None,
                        shape=("ntgt_level_boxes", ncoeffs), offset=lp.auto),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nsrc_level_boxes", ncoeffs), offset=lp.auto),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name, assumptions="ntgt_boxes>=1",
                silenced_warnings="write_race(write_expn*)",
                fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim),
                lang_version=MOST_RECENT_LANGUAGE_VERSION)

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")

        return loopy_knl
Beispiel #5
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        # To clarify terminology:
        #
        # isrc_box -> The index in a list of (in this case, source) boxes
        # src_ibox -> The (global) box number for the (in this case, source) box
        #
        # (same for itgt_box, tgt_ibox)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
                    "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for itgt_box
                    <> tgt_ibox = target_boxes[itgt_box]

                    <> tgt_center[idim] = centers[idim, tgt_ibox] \

                    <> isrc_start = src_box_starts[itgt_box]
                    <> isrc_stop = src_box_starts[itgt_box+1]

                    for isrc_box
                        <> src_ibox = src_box_lists[isrc_box] \
                                {id=read_src_ibox}

                        <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
                        <> d[idim] = tgt_center[idim] - src_center[idim] \
                            {dup=idim}

                        """] + ["""
                        <> src_coeff{coeffidx} = \
                            src_expansions[src_ibox - src_base_ibox, {coeffidx}] \
                            {{dep=read_src_ibox}}
                        """.format(coeffidx=i) for i in range(ncoeff_src)] + [

                        ] + self.get_translation_loopy_insns() + ["""
                    end

                    """] + ["""
                    tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \
                            simul_reduce(sum, isrc_box, coeff{coeffidx}) \
                            {{id_prefix=write_expn}}
                    """.format(coeffidx=i) for i in range(ncoeff_tgt)] + ["""
                end
                """],
                [
                    lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                    lp.ValueArg("src_rscale,tgt_rscale", None),
                    lp.GlobalArg("src_box_starts, src_box_lists",
                        None, shape=None, strides=(1,), offset=lp.auto),
                    lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox",
                        np.int32),
                    lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes",
                        np.int32),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto),
                    lp.GlobalArg("tgt_expansions", None,
                        shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name,
                assumptions="ntgt_boxes>=1",
                silenced_warnings="write_race(write_expn*)",
                default_offset=lp.auto,
                fixed_parameters=dict(dim=self.dim),
                lang_version=MOST_RECENT_LANGUAGE_VERSION
                )

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
        loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr"))

        return loopy_knl
Beispiel #6
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[icenter]: 0<=icenter<ncenters}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for icenter
                    <> isrc_box = qbx_center_to_target_box[icenter]

                    # The box's expansions which we're translating here
                    # (our source) is, globally speaking, a target box.

                    <> src_ibox = target_boxes[isrc_box] \
                        {id=read_src_ibox}

                    # Is the box number on the level currently under
                    # consideration?
                    <> in_range = (target_base_ibox <= src_ibox
                            and src_ibox < target_base_ibox + nboxes)

                    if in_range
                        <> tgt_center[idim] = qbx_centers[idim, icenter]
                        <> src_center[idim] = centers[idim, src_ibox] {dup=idim}

                        <> tgt_rscale = qbx_expansion_radii[icenter]

                        <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim}

                        """] + ["""
                        <> src_coeff{i} = \
                                expansions[src_ibox - target_base_ibox, {i}] \
                                {{dep=read_src_ibox}}
                        """.format(i=i) for i in range(ncoeff_src)] + [
                        ] + self.get_translation_loopy_insns() + ["""
                        qbx_expansions[icenter, {i}] = \
                            qbx_expansions[icenter, {i}] + coeff{i} \
                            {{id_prefix=write_expn}}
                        """.format(i=i)
                            for i in range(ncoeff_tgt)] + ["""
                    end
                end
                """],
                [
                    lp.GlobalArg("target_boxes", None, shape=None,
                        offset=lp.auto),
                    lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
                    lp.ValueArg("src_rscale", None),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"),
                    lp.ValueArg("naligned_boxes,target_base_ibox,nboxes", np.int32),
                    lp.GlobalArg("expansions", None,
                        shape=("nboxes", ncoeff_src), offset=lp.auto),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name,
                assumptions="ncenters>=1",
                silenced_warnings="write_race(write_expn*)",
                fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim),
                lang_version=MOST_RECENT_LANGUAGE_VERSION)

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")

        return loopy_knl
Beispiel #7
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        # To clarify terminology:
        #
        # isrc_box -> The index in a list of (in this case, source) boxes
        # src_ibox -> The (global) box number for the (in this case, source) box
        #
        # (same for itgt_box, tgt_ibox)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
                    "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for itgt_box
                    <> tgt_ibox = target_boxes[itgt_box]

                    <> tgt_center[idim] = centers[idim, tgt_ibox] \

                    <> isrc_start = src_box_starts[itgt_box]
                    <> isrc_stop = src_box_starts[itgt_box+1]

                    for isrc_box
                        <> src_ibox = src_box_lists[isrc_box] \
                                {id=read_src_ibox}

                        <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
                        <> d[idim] = tgt_center[idim] - src_center[idim] \
                            {dup=idim}

                        """] + ["""
                        <> src_coeff{coeffidx} = \
                            src_expansions[src_ibox - src_base_ibox, {coeffidx}] \
                            {{dep=read_src_ibox}}
                        """.format(coeffidx=i) for i in range(ncoeff_src)] + [

                        ] + self.get_translation_loopy_insns() + ["""
                    end

                    """] + ["""
                    tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \
                            simul_reduce(sum, isrc_box, coeff{coeffidx}) \
                            {{id_prefix=write_expn}}
                    """.format(coeffidx=i) for i in range(ncoeff_tgt)] + ["""
                end
                """],
                [
                    lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                    lp.ValueArg("src_rscale,tgt_rscale", None),
                    lp.GlobalArg("src_box_starts, src_box_lists",
                        None, shape=None, strides=(1,), offset=lp.auto),
                    lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox",
                        np.int32),
                    lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes",
                        np.int32),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto),
                    lp.GlobalArg("tgt_expansions", None,
                        shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name,
                assumptions="ntgt_boxes>=1",
                silenced_warnings="write_race(write_expn*)",
                default_offset=lp.auto,
                fixed_parameters=dict(dim=self.dim),
                lang_version=MOST_RECENT_LANGUAGE_VERSION
                )

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
        loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr"))

        return loopy_knl
Beispiel #8
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[icenter]: 0<=icenter<ncenters}",
                    "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for icenter
                    <> icontaining_tgt_box = \
                        qbx_center_to_target_box_source_level[icenter]

                    if icontaining_tgt_box != -1
                        <> tgt_center[idim] = qbx_centers[idim, icenter] \
                                {id=fetch_tgt_center}
                        <> tgt_rscale = qbx_expansion_radii[icenter]

                        <> isrc_start = src_box_starts[icontaining_tgt_box]
                        <> isrc_stop = src_box_starts[icontaining_tgt_box+1]

                        for isrc_box
                            <> src_ibox = src_box_lists[isrc_box] \
                                    {id=read_src_ibox}
                            <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
                            <> d[idim] = tgt_center[idim] - src_center[idim] \
                                    {dup=idim}
                            """] + ["""

                            <> src_coeff{i} = \
                                src_expansions[src_ibox - src_base_ibox, {i}] \
                                {{dep=read_src_ibox}}

                            """.format(i=i) for i in range(ncoeff_src)] + [

                            ] + self.get_translation_loopy_insns() + ["""

                        end
                        """] + ["""
                        qbx_expansions[icenter, {i}] = \
                                qbx_expansions[icenter, {i}] + \
                                simul_reduce(sum, isrc_box, coeff{i}) \
                                {{id_prefix=write_expn}}
                        """.format(i=i)
                                for i in range(ncoeff_tgt)] + ["""
                    end
                end
                """],
                [
                    lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                    lp.ValueArg("src_rscale", None),
                    lp.GlobalArg("src_box_starts, src_box_lists",
                        None, shape=None, strides=(1,)),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"),
                    lp.ValueArg("aligned_nboxes,nsrc_level_boxes", np.int32),
                    lp.ValueArg("src_base_ibox", np.int32),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto),
                    lp.GlobalArg("qbx_expansions", None,
                        shape=("ncenters", ncoeff_tgt)),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name, assumptions="ncenters>=1",
                silenced_warnings="write_race(write_expn*)",
                fixed_parameters=dict(dim=self.dim),
                lang_version=MOST_RECENT_LANGUAGE_VERSION)

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")

        return loopy_knl
Beispiel #9
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[icenter]: 0<=icenter<ncenters}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                self.get_translation_loopy_insns()
                + ["""
                    <> isrc_box = qbx_center_to_target_box[icenter]

                    # The box's expansions which we're translating here
                    # (our source) is, globally speaking, a target box.

                    <> src_ibox = target_boxes[isrc_box] \
                        {id=read_src_ibox}

                    <> tgt_center[idim] = qbx_centers[idim, icenter] \
                        {id=fetch_tgt_center}

                    <> src_center[idim] = centers[idim, src_ibox] \
                        {id=fetch_src_center}
                    <> d[idim] = tgt_center[idim] - src_center[idim]

                    <> src_coeff${SRC_COEFFIDX} = \
                        expansions[src_ibox, ${SRC_COEFFIDX}] \
                        {dep=read_src_ibox}
                    qbx_expansions[icenter, ${TGT_COEFFIDX}] = \
                        qbx_expansions[icenter, ${TGT_COEFFIDX}] \
                        + coeff${TGT_COEFFIDX} \
                        {id_prefix=write_expn}
                    """],
                [
                    lp.GlobalArg("target_boxes", None, shape=None,
                        offset=lp.auto),
                    lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.ValueArg("naligned_boxes,nboxes", np.int32),
                    lp.GlobalArg("expansions", None,
                        shape=("nboxes", ncoeff_src)),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name, assumptions="ncenters>=1",
                defines=dict(
                    dim=self.dim,
                    nchildren=2**self.dim,
                    SRC_COEFFIDX=[str(i) for i in range(ncoeff_src)],
                    TGT_COEFFIDX=[str(i) for i in range(ncoeff_tgt)],
                    ),
                silenced_warnings="write_race(write_expn*)")

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center",
                tags={"idim": "unr"})
        loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr"))

        return loopy_knl
Beispiel #10
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[icenter]: 0<=icenter<ncenters}",
                    "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                self.get_translation_loopy_insns()
                + ["""
                    <> icontaining_tgt_box = qbx_center_to_target_box[icenter]

                    <> tgt_center[idim] = qbx_centers[idim, icenter] \
                            {id=fetch_tgt_center}

                    <> isrc_start = src_box_starts[icontaining_tgt_box]
                    <> isrc_stop = src_box_starts[icontaining_tgt_box+1]

                    <> src_ibox = src_box_lists[isrc_box] \
                            {id=read_src_ibox}
                    <> src_center[idim] = centers[idim, src_ibox] \
                            {id=fetch_src_center}
                    <> d[idim] = tgt_center[idim] - src_center[idim]
                    <> src_coeff${SRC_COEFFIDX} = \
                        src_expansions[src_ibox, ${SRC_COEFFIDX}] \
                        {dep=read_src_ibox}

                    qbx_expansions[icenter, ${TGT_COEFFIDX}] = \
                            sum(isrc_box, coeff${TGT_COEFFIDX}) \
                            {id_prefix=write_expn}
                    """],
                [
                    lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                    lp.GlobalArg("src_box_starts, src_box_lists",
                        None, shape=None, strides=(1,)),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.ValueArg("aligned_nboxes,nboxes", np.int32),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nboxes", ncoeff_src)),
                    lp.GlobalArg("qbx_expansions", None,
                        shape=("ncenters", ncoeff_tgt)),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name, assumptions="ncenters>=1",
                defines=dict(
                    dim=self.dim,
                    SRC_COEFFIDX=[str(i) for i in range(ncoeff_src)],
                    TGT_COEFFIDX=[str(i) for i in range(ncoeff_tgt)],
                    ),
                silenced_warnings="write_race(write_expn*)")

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center",
                tags={"idim": "unr"})
        loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr"))

        return loopy_knl
Beispiel #11
0
    def get_kernel(self):
        if self.src_expansion is not self.tgt_expansion:
            raise RuntimeError("%s requires that the source "
                               "and target expansion are the same object" %
                               type(self).__name__)

        ncoeffs = len(self.src_expansion)

        # To clarify terminology:
        #
        # isrc_box -> The index in a list of (in this case, source) boxes
        # src_ibox -> The (global) box number for the (in this case, source) box
        #
        # (same for itgt_box, tgt_ibox)

        loopy_insns = [
            insn.copy(predicates=insn.predicates
                      | frozenset(["is_src_box_valid"]),
                      id=lp.UniqueName("compute_coeff"))
            for insn in self.get_translation_loopy_insns()
        ]

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
            [
                "{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
                "{[isrc_box]: 0<=isrc_box<nchildren}",
                "{[idim]: 0<=idim<dim}",
            ],
            [
                """
                for itgt_box
                    <> tgt_ibox = target_boxes[itgt_box]

                    <> tgt_center[idim] = centers[idim, tgt_ibox] \

                    for isrc_box
                        <> src_ibox = box_child_ids[isrc_box,tgt_ibox] \
                                {id=read_src_ibox}
                        <> is_src_box_valid = src_ibox != 0

                        if is_src_box_valid
                            <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
                            <> d[idim] = tgt_center[idim] - src_center[idim] \
                                    {dup=idim}

                            """
            ] + [
                """
                            <> src_coeff{i} = \
                                src_expansions[src_ibox - src_base_ibox, {i}] \
                                {{id_prefix=read_coeff,dep=read_src_ibox}}
                            """.format(i=i) for i in range(ncoeffs)
            ] + [] + loopy_insns + [
                """
                            tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \
                                tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] \
                                + coeff{i} \
                                {{id_prefix=write_expn,dep=compute_coeff*,
                                    nosync=read_coeff*}}
                            """.format(i=i) for i in range(ncoeffs)
            ] + [
                """
                        end
                    end
                end
                """
            ],
            [
                lp.GlobalArg(
                    "target_boxes", None, shape=lp.auto, offset=lp.auto),
                lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                lp.ValueArg("src_rscale,tgt_rscale", None),
                lp.GlobalArg(
                    "box_child_ids", None, shape="nchildren, aligned_nboxes"),
                lp.GlobalArg("tgt_expansions",
                             None,
                             shape=("ntgt_level_boxes", ncoeffs),
                             offset=lp.auto),
                lp.GlobalArg("src_expansions",
                             None,
                             shape=("nsrc_level_boxes", ncoeffs),
                             offset=lp.auto),
                lp.ValueArg("src_base_ibox,tgt_base_ibox", np.int32),
                lp.ValueArg("ntgt_level_boxes,nsrc_level_boxes", np.int32),
                lp.ValueArg("aligned_nboxes", np.int32), "..."
            ] +
            gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
            name=self.name,
            assumptions="ntgt_boxes>=1",
            silenced_warnings="write_race(write_expn*)",
            fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim))

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")

        return loopy_knl
Beispiel #12
0
    def get_kernel(self):
        if self.src_expansion is not self.tgt_expansion:
            raise RuntimeError("%s requires that the source "
                    "and target expansion are the same object"
                    % type(self).__name__)

        ncoeffs = len(self.src_expansion)

        # To clarify terminology:
        #
        # isrc_box -> The index in a list of (in this case, source) boxes
        # src_ibox -> The (global) box number for the (in this case, source) box
        #
        # (same for itgt_box, tgt_ibox)

        loopy_insns = [
                insn.copy(
                    predicates=insn.predicates | frozenset(["is_src_box_valid"]),
                    id=lp.UniqueName("compute_coeff"))
                for insn in self.get_translation_loopy_insns()]

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[itgt_box]: 0<=itgt_box<ntgt_boxes}",
                    "{[isrc_box]: 0<=isrc_box<nchildren}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                ["""
                for itgt_box
                    <> tgt_ibox = target_boxes[itgt_box]

                    <> tgt_center[idim] = centers[idim, tgt_ibox] \

                    for isrc_box
                        <> src_ibox = box_child_ids[isrc_box,tgt_ibox] \
                                {id=read_src_ibox}
                        <> is_src_box_valid = src_ibox != 0

                        if is_src_box_valid
                            <> src_center[idim] = centers[idim, src_ibox] {dup=idim}
                            <> d[idim] = tgt_center[idim] - src_center[idim] \
                                    {dup=idim}

                            """] + ["""
                            <> src_coeff{i} = \
                                src_expansions[src_ibox - src_base_ibox, {i}] \
                                {{id_prefix=read_coeff,dep=read_src_ibox}}
                            """.format(i=i) for i in range(ncoeffs)] + [
                            ] + loopy_insns + ["""
                            tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \
                                tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] \
                                + coeff{i} \
                                {{id_prefix=write_expn,dep=compute_coeff*,
                                    nosync=read_coeff*}}
                            """.format(i=i) for i in range(ncoeffs)] + ["""
                        end
                    end
                end
                """],
                [
                    lp.GlobalArg("target_boxes", None, shape=lp.auto,
                        offset=lp.auto),
                    lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                    lp.GlobalArg("box_child_ids", None,
                        shape="nchildren, aligned_nboxes"),
                    lp.GlobalArg("tgt_expansions", None,
                        shape=("ntgt_level_boxes", ncoeffs), offset=lp.auto),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nsrc_level_boxes", ncoeffs), offset=lp.auto),
                    lp.ValueArg("src_base_ibox,tgt_base_ibox", np.int32),
                    lp.ValueArg("ntgt_level_boxes,nsrc_level_boxes", np.int32),
                    lp.ValueArg("aligned_nboxes", np.int32),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name,
                assumptions="ntgt_boxes>=1",
                silenced_warnings="write_race(write_expn*)")

        loopy_knl = lp.fix_parameters(loopy_knl,
                dim=self.dim,
                nchildren=2**self.dim)

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")

        return loopy_knl