Ejemplo n.º 1
0
def test_rename_argument_of_domain_params(ctx_factory):
    knl = lp.make_kernel("{[i, j]: 0<=i<n and 0<=j<m}", """
            y[i, j] = 2.0f
            """)

    knl = lp.rename_argument(knl, "n", "N")
    knl = lp.rename_argument(knl, "m", "M")

    # renamed variables should not appear in the code
    code_str = lp.generate_code_v2(knl).device_code()
    assert code_str.find("int const n") == -1
    assert code_str.find("int const m") == -1
    assert code_str.find("int const N") != -1
    assert code_str.find("int const M") != -1

    lp.auto_test_vs_ref(knl, ctx_factory(), knl, parameters={"M": 10, "N": 4})
Ejemplo n.º 2
0
def test_rename_argument(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    kernel = lp.make_kernel('''{ [i]: 0<=i<n }''', '''out[i] = a + 2''')

    kernel = lp.rename_argument(kernel, "a", "b")

    evt, (out, ) = kernel(queue, b=np.float32(12), n=20)

    assert (np.abs(out.get() - 14) < 1e-8).all()
Ejemplo n.º 3
0
def test_rename_argument(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    kernel = lp.make_kernel(
         '''{ [i]: 0<=i<n }''',
         '''out[i] = a + 2''')

    kernel = lp.rename_argument(kernel, "a", "b")

    evt, (out,) = kernel(queue, b=np.float32(12), n=20)

    assert (np.abs(out.get() - 14) < 1e-8).all()
Ejemplo n.º 4
0
def test_rename_argument_with_assumptions():
    import islpy as isl
    knl = lp.make_kernel("{[i]: 0<=i<n_old}", """
            y[i] = 2.0f
            """)
    knl = lp.assume(knl, "n_old=10")

    knl = lp.rename_argument(knl, "n_old", "n_new")
    assumptions = knl["loopy_kernel"].assumptions

    assert "n_old" not in assumptions.get_var_dict()
    assert "n_new" in assumptions.get_var_dict()
    assert ((assumptions
             & isl.BasicSet("[n_new]->{: n_new=10}")) == assumptions)
Ejemplo n.º 5
0
def test_rename_argument_with_auto_stride(ctx_factory):
    from loopy.kernel.array import FixedStrideArrayDimTag

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
            "{[i]: 0<=i<10}",
            """
            y[i] = x[i]
            """, [lp.GlobalArg("x", dtype=float,
                               shape=lp.auto,
                               dim_tags=[FixedStrideArrayDimTag(lp.auto)]), ...])

    knl = lp.rename_argument(knl, "x", "x_new")

    code_str = lp.generate_code_v2(knl).device_code()
    assert code_str.find("double const *__restrict__ x_new,") != -1
    assert code_str.find("double const *__restrict__ x,") == -1

    evt, (out, ) = knl(queue, x_new=np.random.rand(10))
Ejemplo n.º 6
0
    def __call__(self, ary):
        from meshmode.dof_array import DOFArray
        if not isinstance(ary, DOFArray):
            raise TypeError("non-array passed to discretization connection")

        actx = ary.array_context

        @memoize_in(actx, (
            DirectDiscretizationConnection,
            "resample_by_mat_knl",
            self.is_surjective,
        ))
        def mat_knl():
            if self.is_surjective:
                domains = [
                    """
                        {[iel, idof, j]:
                        0<=iel<nelements and
                        0<=idof<n_to_nodes and
                        0<=j<n_from_nodes}
                        """,
                ]

                instructions = """
                result[to_element_indices[iel], idof] \
                        = sum(j, resample_mat[idof, j] \
                        * ary[from_element_indices[iel], j])
                        """
            else:
                domains = [
                    """
                        {[iel_init, idof_init]:
                        0<=iel_init<nelements_result and
                        0<=idof_init<n_to_nodes}
                        """,
                    """
                        {[iel, idof, j]:
                        0<=iel<nelements and
                        0<=idof<n_to_nodes and
                        0<=j<n_from_nodes}
                        """,
                ]

                instructions = """
                result[iel_init, idof_init] = 0 {id=init}
                ... gbarrier {id=barrier, dep=init}
                result[to_element_indices[iel], idof] \
                        = sum(j, resample_mat[idof, j] \
                        * ary[from_element_indices[iel], j]) {dep=barrier}
                        """
            knl = make_loopy_program(
                domains,
                instructions, [
                    lp.GlobalArg("result",
                                 None,
                                 shape="nelements_result, n_to_nodes",
                                 offset=lp.auto),
                    lp.GlobalArg("ary",
                                 None,
                                 shape="nelements_vec, n_from_nodes",
                                 offset=lp.auto),
                    lp.ValueArg("nelements_result", np.int32),
                    lp.ValueArg("nelements_vec", np.int32),
                    lp.ValueArg("n_from_nodes", np.int32),
                    "...",
                ],
                name="resample_by_mat")

            return knl

        @memoize_in(actx, (DirectDiscretizationConnection,
                           "resample_by_picking_knl", self.is_surjective))
        def pick_knl():
            if self.is_surjective:
                domains = [
                    """{[iel, idof]:
                        0<=iel<nelements and
                        0<=idof<n_to_nodes}"""
                ]
                instructions = """
                result[to_element_indices[iel], idof] \
                    = ary[from_element_indices[iel], pick_list[idof]]
                """
            else:
                domains = [
                    """
                        {[iel_init, idof_init]:
                        0<=iel_init<nelements_result and
                        0<=idof_init<n_to_nodes}
                        """, """
                        {[iel, idof]:
                        0<=iel<nelements and
                        0<=idof<n_to_nodes}
                        """
                ]
                instructions = """
                result[iel_init, idof_init] = 0 {id=init}
                ... gbarrier {id=barrier, dep=init}
                result[to_element_indices[iel], idof] \
                    = ary[from_element_indices[iel], pick_list[idof]] {dep=barrier}
                """
            knl = make_loopy_program(
                domains,
                instructions, [
                    lp.GlobalArg("result",
                                 None,
                                 shape="nelements_result, n_to_nodes",
                                 offset=lp.auto),
                    lp.GlobalArg("ary",
                                 None,
                                 shape="nelements_vec, n_from_nodes",
                                 offset=lp.auto),
                    lp.ValueArg("nelements_result", np.int32),
                    lp.ValueArg("nelements_vec", np.int32),
                    lp.ValueArg("n_from_nodes", np.int32),
                    "...",
                ],
                name="resample_by_picking")

            return knl

        if ary.shape != (len(self.from_discr.groups), ):
            raise ValueError("invalid shape of incoming resampling data")

        group_idx_to_result = []

        for i_tgrp, (tgrp,
                     cgrp) in enumerate(zip(self.to_discr.groups,
                                            self.groups)):

            kernels = []  # get kernels for each batch; to be fused eventually
            kwargs = {}  # kwargs to the fused kernel
            for i_batch, batch in enumerate(cgrp.batches):
                if batch.from_element_indices.size == 0:
                    continue

                point_pick_indices = self._resample_point_pick_indices(
                    actx, i_tgrp, i_batch)

                if point_pick_indices is None:
                    knl = mat_knl()
                    knl = lp.rename_argument(knl, "resample_mat",
                                             f"resample_mat_{i_batch}")
                    kwargs[f"resample_mat_{i_batch}"] = (self._resample_matrix(
                        actx, i_tgrp, i_batch))
                else:
                    knl = pick_knl()
                    knl = lp.rename_argument(knl, "pick_list",
                                             f"pick_list_{i_batch}")
                    kwargs[f"pick_list_{i_batch}"] = point_pick_indices

                # {{{ enforce different namespaces for the kernels

                for iname in knl.all_inames():
                    knl = lp.rename_iname(knl, iname, f"{iname}_{i_batch}")

                knl = lp.rename_argument(knl, "ary", f"ary_{i_batch}")
                knl = lp.rename_argument(knl, "from_element_indices",
                                         f"from_element_indices_{i_batch}")
                knl = lp.rename_argument(knl, "to_element_indices",
                                         f"to_element_indices_{i_batch}")
                knl = lp.rename_argument(knl, "nelements",
                                         f"nelements_{i_batch}")

                # }}}

                kwargs[f"ary_{i_batch}"] = ary[batch.from_group_index]
                kwargs[f"from_element_indices_{i_batch}"] = (
                    batch.from_element_indices)
                kwargs[f"to_element_indices_{i_batch}"] = (
                    batch.to_element_indices)

                kernels.append(knl)

            fused_knl = lp.fuse_kernels(kernels)
            # order of operations doesn't matter
            fused_knl = lp.add_nosync(fused_knl,
                                      "global",
                                      "writes:result",
                                      "writes:result",
                                      bidirectional=True,
                                      force=True)

            result_dict = actx.call_loopy(fused_knl,
                                          nelements_result=tgrp.nelements,
                                          n_to_nodes=tgrp.nunit_dofs,
                                          **kwargs)

            group_idx_to_result.append(result_dict["result"])

        from meshmode.dof_array import DOFArray
        return DOFArray.from_list(actx, group_idx_to_result)
Ejemplo n.º 7
0
def test_write_block_matrix_fusion(ctx_factory):
    """
    A slightly more complicated fusion test, where all
    sub-kernels write into the same global matrix, but
    in well-defined separate blocks. This tests makes sure
    data flow specification is preserved during fusion for
    matrix-assembly-like programs.
    """

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    def init_global_mat_prg():
        return lp.make_kernel(
            ["{[idof]: 0 <= idof < n}", "{[jdof]: 0 <= jdof < m}"],
            """
                result[idof, jdof]  = 0 {id=init}
            """,
            [
                lp.GlobalArg("result", None, shape="n, m", offset=lp.auto),
                lp.ValueArg("n, m", np.int32),
                "...",
            ],
            options=lp.Options(return_dict=True),
            default_offset=lp.auto,
            name="init_a_global_matrix",
        )

    def write_into_mat_prg():
        return lp.make_kernel(
            ["{[idof]: 0 <= idof < ndofs}", "{[jdof]: 0 <= jdof < mdofs}"],
            """
                result[offset_i + idof, offset_j + jdof] = mat[idof, jdof]
            """,
            [
                lp.GlobalArg("result", None, shape="n, m", offset=lp.auto),
                lp.ValueArg("n, m", np.int32),
                lp.GlobalArg("mat", None, shape="ndofs, mdofs",
                             offset=lp.auto),
                lp.ValueArg("offset_i", np.int32),
                lp.ValueArg("offset_j", np.int32),
                "...",
            ],
            options=lp.Options(return_dict=True),
            default_offset=lp.auto,
            name="write_into_global_matrix",
        )

    # Construct a 2x2 diagonal matrix with
    # random 5x5 blocks on the block-diagonal,
    # and zeros elsewhere
    n = 10
    block_n = 5
    mat1 = np.random.randn(block_n, block_n)
    mat2 = np.random.randn(block_n, block_n)
    answer = np.block([[mat1, np.zeros((block_n, block_n))],
                       [np.zeros((block_n, block_n)), mat2]])
    kwargs = {"n": n, "m": n}

    # Do some renaming of individual programs before fusion
    kernels = [init_global_mat_prg()]
    for idx, (offset, mat) in enumerate([(0, mat1), (block_n, mat2)]):
        knl = lp.rename_argument(write_into_mat_prg(), "mat", f"mat_{idx}")
        kwargs[f"mat_{idx}"] = mat

        for iname in knl.default_entrypoint.all_inames():
            knl = lp.rename_iname(knl, iname, f"{iname}_{idx}")

        knl = lp.rename_argument(knl, "ndofs", f"ndofs_{idx}")
        knl = lp.rename_argument(knl, "mdofs", f"mdofs_{idx}")
        kwargs[f"ndofs_{idx}"] = block_n
        kwargs[f"mdofs_{idx}"] = block_n

        knl = lp.rename_argument(knl, "offset_i", f"offset_i_{idx}")
        knl = lp.rename_argument(knl, "offset_j", f"offset_j_{idx}")
        kwargs[f"offset_i_{idx}"] = offset
        kwargs[f"offset_j_{idx}"] = offset

        kernels.append(knl)

    fused_knl = lp.fuse_kernels(
        kernels,
        data_flow=[("result", 0, 1), ("result", 1, 2)],
    )
    fused_knl = lp.add_nosync(fused_knl,
                              "global",
                              "writes:result",
                              "writes:result",
                              bidirectional=True,
                              force=True)
    evt, result = fused_knl(queue, **kwargs)
    result = result["result"]
    np.testing.assert_allclose(result, answer)