예제 #1
0
def simple_mpi_communication_entrypoint():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue, force_device_scalars=True)

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    from meshmode.mesh import BTAG_ALL

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-1,)*2,
                                          b=(1,)*2,
                                          nelements_per_axis=(2,)*2)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx, local_mesh, order=5,
            mpi_communicator=comm)

    x = thaw(dcoll.nodes(), actx)
    myfunc = actx.np.sin(np.dot(x, [2, 3]))

    from grudge.dof_desc import as_dofdesc

    dd_int = as_dofdesc("int_faces")
    dd_vol = as_dofdesc("vol")
    dd_af = as_dofdesc("all_faces")

    all_faces_func = op.project(dcoll, dd_vol, dd_af, myfunc)
    int_faces_func = op.project(dcoll, dd_vol, dd_int, myfunc)
    bdry_faces_func = op.project(dcoll, BTAG_ALL, dd_af,
                                 op.project(dcoll, dd_vol, BTAG_ALL, myfunc))

    hopefully_zero = (
        op.project(
            dcoll, "int_faces", "all_faces",
            dcoll.opposite_face_connection()(int_faces_func)
        )
        + sum(op.project(dcoll, tpair.dd, "all_faces", tpair.int)
              for tpair in op.cross_rank_trace_pairs(dcoll, myfunc))
    ) - (all_faces_func - bdry_faces_func)

    error = actx.to_numpy(flat_norm(hopefully_zero, ord=np.inf))

    print(__file__)
    with np.printoptions(threshold=100000000, suppress=True):
        logger.debug(hopefully_zero)
    logger.info("error: %.5e", error)

    assert error < 1e-14
예제 #2
0
파일: cavities.py 프로젝트: sll2/grudge
def main(ctx_factory, dim=3, order=4, visualize=False):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    from meshmode.mesh.generation import generate_regular_rect_mesh
    mesh = generate_regular_rect_mesh(a=(0.0, ) * dim,
                                      b=(1.0, ) * dim,
                                      nelements_per_axis=(4, ) * dim)

    dcoll = DiscretizationCollection(actx, mesh, order=order)

    if 0:
        epsilon0 = 8.8541878176e-12  # C**2 / (N m**2)
        mu0 = 4 * np.pi * 1e-7  # N/A**2.
        epsilon = 1 * epsilon0
        mu = 1 * mu0
    else:
        epsilon = 1
        mu = 1

    from grudge.models.em import MaxwellOperator

    maxwell_operator = MaxwellOperator(dcoll,
                                       epsilon,
                                       mu,
                                       flux_type=0.5,
                                       dimensions=dim)

    def cavity_mode(x, t=0):
        if dim == 3:
            return get_rectangular_cavity_mode(actx, x, t, 1, (1, 2, 2))
        else:
            return get_rectangular_cavity_mode(actx, x, t, 1, (2, 3))

    fields = cavity_mode(thaw(dcoll.nodes(), actx), t=0)

    maxwell_operator.check_bc_coverage(mesh)

    def rhs(t, w):
        return maxwell_operator.operator(t, w)

    dt = maxwell_operator.estimate_rk4_timestep(actx, dcoll, fields=fields)

    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    target_steps = 60
    final_t = dt * target_steps
    nsteps = int(final_t / dt) + 1

    logger.info("dt = %g nsteps = %d", dt, nsteps)

    from grudge.shortcuts import make_visualizer
    vis = make_visualizer(dcoll)

    step = 0

    def norm(u):
        return op.norm(dcoll, u, 2)

    e, h = maxwell_operator.split_eh(fields)

    if visualize:
        vis.write_vtk_file(f"fld-cavities-{step:04d}.vtu", [
            ("e", e),
            ("h", h),
        ])

    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1
            e, h = maxwell_operator.split_eh(event.state_component)

            norm_e0 = actx.to_numpy(norm(u=e[0]))
            norm_e1 = actx.to_numpy(norm(u=e[1]))
            norm_h0 = actx.to_numpy(norm(u=h[0]))
            norm_h1 = actx.to_numpy(norm(u=h[1]))

            logger.info(
                "[%04d] t = %.5f |e0| = %.5e, |e1| = %.5e, |h0| = %.5e, |h1| = %.5e",
                step, event.t, norm_e0, norm_e1, norm_h0, norm_h1)

            if step % 10 == 0:
                if visualize:
                    vis.write_vtk_file(f"fld-cavities-{step:04d}.vtu", [
                        ("e", e),
                        ("h", h),
                    ])

            # NOTE: These are here to ensure the solution is bounded for the
            # time interval specified
            assert norm_e0 < 0.5
            assert norm_e1 < 0.5
            assert norm_h0 < 0.5
            assert norm_h1 < 0.5
예제 #3
0
def mpi_communication_entrypoint():
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue, force_device_scalars=True)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    i_local_rank = comm.Get_rank()
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    dim = 2
    order = 4

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(a=(-0.5,)*dim,
                                          b=(0.5,)*dim,
                                          nelements_per_axis=(16,)*dim)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)

        del mesh
    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx, local_mesh, order=order,
                                     mpi_communicator=comm)

    def source_f(actx, dcoll, t=0):
        source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim]
        source_width = 0.05
        source_omega = 3
        nodes = thaw(dcoll.nodes(), actx)
        source_center_dist = flat_obj_array(
            [nodes[i] - source_center[i] for i in range(dcoll.dim)]
        )
        return (
            np.sin(source_omega*t)
            * actx.np.exp(
                -np.dot(source_center_dist, source_center_dist)
                / source_width**2
            )
        )

    from grudge.models.wave import WeakWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE

    wave_op = WeakWaveOperator(
        dcoll,
        0.1,
        source_f=source_f,
        dirichlet_tag=BTAG_NONE,
        neumann_tag=BTAG_NONE,
        radiation_tag=BTAG_ALL,
        flux_type="upwind"
    )

    fields = flat_obj_array(
        dcoll.zeros(actx),
        [dcoll.zeros(actx) for i in range(dcoll.dim)]
    )

    dt = actx.to_numpy(
        2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields))

    wave_op.check_bc_coverage(local_mesh)

    from logpyle import LogManager, \
            add_general_quantities, \
            add_run_info
    log_filename = None
    # NOTE: LogManager hangs when using a file on a shared directory.
    # log_filename = "grudge_log.dat"
    logmgr = LogManager(log_filename, "w", comm)
    add_run_info(logmgr)
    add_general_quantities(logmgr)

    def rhs(t, w):
        return wave_op.operator(t, w)

    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    final_t = 4
    nsteps = int(final_t/dt)
    logger.info("[%04d] dt %.5e nsteps %4d", i_local_rank, dt, nsteps)

    step = 0

    def norm(u):
        return op.norm(dcoll, u, 2)

    from time import time
    t_last_step = time()

    logmgr.tick_before()
    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1
            logger.info("[%04d] t = %.5e |u| = %.5e ellapsed %.5e",
                        step, event.t,
                        norm(u=event.state_component[0]),
                        time() - t_last_step)

            t_last_step = time()
            logmgr.tick_after()
            logmgr.tick_before()

    logmgr.tick_after()
    logmgr.close()
    logger.info("Rank %d exiting", i_local_rank)
예제 #4
0
def main(ctx_factory,
         dim=2,
         order=4,
         use_quad=False,
         visualize=False,
         flux_type="upwind"):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    # {{{ parameters

    # domain [0, d]^dim
    d = 1.0
    # number of points in each dimension
    npoints = 25

    # final time
    final_time = 1

    if use_quad:
        qtag = dof_desc.DISCR_TAG_QUAD
    else:
        qtag = None

    # }}}

    # {{{ discretization

    from meshmode.mesh.generation import generate_regular_rect_mesh
    mesh = generate_regular_rect_mesh(a=(0, ) * dim,
                                      b=(d, ) * dim,
                                      npoints_per_axis=(npoints, ) * dim,
                                      order=order)

    from meshmode.discretization.poly_element import \
            QuadratureSimplexGroupFactory

    if use_quad:
        discr_tag_to_group_factory = {
            qtag: QuadratureSimplexGroupFactory(order=4 * order)
        }
    else:
        discr_tag_to_group_factory = {}

    from grudge import DiscretizationCollection

    dcoll = DiscretizationCollection(
        actx,
        mesh,
        order=order,
        discr_tag_to_group_factory=discr_tag_to_group_factory)

    # }}}

    # {{{ advection operator

    # gaussian parameters

    def f_halfcircle(x):
        source_center = np.array([d / 2, d / 2, d / 2])[:dim]
        dist = x - source_center
        return ((0.5 + 0.5 * actx.np.tanh(500 *
                                          (-np.dot(dist, dist) + 0.4**2))) *
                (0.5 + 0.5 * actx.np.tanh(500 * (dist[0]))))

    def zero_inflow_bc(dtag, t=0):
        dd = dof_desc.DOFDesc(dtag, qtag)
        return dcoll.discr_from_dd(dd).zeros(actx)

    from grudge.models.advection import VariableCoefficientAdvectionOperator

    x = thaw(dcoll.nodes(), actx)

    # velocity field
    if dim == 1:
        c = x
    else:
        # solid body rotation
        c = flat_obj_array(np.pi * (d / 2 - x[1]), np.pi * (x[0] - d / 2),
                           0)[:dim]

    adv_operator = VariableCoefficientAdvectionOperator(
        dcoll,
        c,
        inflow_u=lambda t: zero_inflow_bc(BTAG_ALL, t),
        quad_tag=qtag,
        flux_type=flux_type)

    u = f_halfcircle(x)

    def rhs(t, u):
        return adv_operator.operator(t, u)

    dt = actx.to_numpy(
        adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u))

    logger.info("Timestep size: %g", dt)

    # }}}

    # {{{ time stepping

    from grudge.shortcuts import set_up_rk4
    dt_stepper = set_up_rk4("u", dt, u, rhs)
    plot = Plotter(actx, dcoll, order, visualize=visualize, ylim=[-0.1, 1.1])

    step = 0
    for event in dt_stepper.run(t_end=final_time):
        if not isinstance(event, dt_stepper.StateComputed):
            continue

        if step % 10 == 0:
            norm_u = actx.to_numpy(op.norm(dcoll, event.state_component, 2))
            plot(event, "fld-var-velocity-%04d" % step)

        step += 1
        logger.info("[%04d] t = %.5f |u| = %.5e", step, event.t, norm_u)

        # NOTE: These are here to ensure the solution is bounded for the
        # time interval specified
        assert norm_u < 1
예제 #5
0
def main(ctx_factory, dim=2, order=4, use_quad=False, visualize=False):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    # {{{ parameters

    # sphere radius
    radius = 1.0
    # sphere resolution
    resolution = 64 if dim == 2 else 1

    # final time
    final_time = np.pi

    # flux
    flux_type = "lf"

    # }}}

    # {{{ discretization

    if dim == 2:
        from meshmode.mesh.generation import make_curve_mesh, ellipse
        mesh = make_curve_mesh(
                lambda t: radius * ellipse(1.0, t),
                np.linspace(0.0, 1.0, resolution + 1),
                order)
    elif dim == 3:
        from meshmode.mesh.generation import generate_icosphere
        mesh = generate_icosphere(radius, order=4 * order,
                uniform_refinement_rounds=resolution)
    else:
        raise ValueError("unsupported dimension")

    discr_tag_to_group_factory = {}
    if use_quad:
        qtag = dof_desc.DISCR_TAG_QUAD
    else:
        qtag = None

    from meshmode.discretization.poly_element import \
            default_simplex_group_factory, \
            QuadratureSimplexGroupFactory

    discr_tag_to_group_factory[dof_desc.DISCR_TAG_BASE] = \
        default_simplex_group_factory(base_dim=dim-1, order=order)

    if use_quad:
        discr_tag_to_group_factory[qtag] = \
            QuadratureSimplexGroupFactory(order=4*order)

    from grudge import DiscretizationCollection

    dcoll = DiscretizationCollection(
        actx, mesh,
        discr_tag_to_group_factory=discr_tag_to_group_factory
    )

    volume_discr = dcoll.discr_from_dd(dof_desc.DD_VOLUME)
    logger.info("ndofs:     %d", volume_discr.ndofs)
    logger.info("nelements: %d", volume_discr.mesh.nelements)

    # }}}

    # {{{ Surface advection operator

    # velocity field
    x = thaw(dcoll.nodes(), actx)
    c = make_obj_array([-x[1], x[0], 0.0])[:dim]

    def f_initial_condition(x):
        return x[0]

    from grudge.models.advection import SurfaceAdvectionOperator
    adv_operator = SurfaceAdvectionOperator(
        dcoll,
        c,
        flux_type=flux_type,
        quad_tag=qtag
    )

    u0 = f_initial_condition(x)

    def rhs(t, u):
        return adv_operator.operator(t, u)

    # check velocity is tangential
    from grudge.geometry import normal

    surf_normal = normal(actx, dcoll, dd=dof_desc.DD_VOLUME)

    error = op.norm(dcoll, c.dot(surf_normal), 2)
    logger.info("u_dot_n:   %.5e", error)

    # }}}

    # {{{ time stepping

    # FIXME: dt estimate is not necessarily valid for surfaces
    dt = actx.to_numpy(
        0.45 * adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u0))
    nsteps = int(final_time // dt) + 1

    logger.info("dt:        %.5e", dt)
    logger.info("nsteps:    %d", nsteps)

    from grudge.shortcuts import set_up_rk4
    dt_stepper = set_up_rk4("u", dt, u0, rhs)
    plot = Plotter(actx, dcoll, order, visualize=visualize)

    norm_u = actx.to_numpy(op.norm(dcoll, u0, 2))

    step = 0

    event = dt_stepper.StateComputed(0.0, 0, 0, u0)
    plot(event, "fld-surface-%04d" % 0)

    if visualize and dim == 3:
        from grudge.shortcuts import make_visualizer
        vis = make_visualizer(dcoll)
        vis.write_vtk_file(
            "fld-surface-velocity.vtu",
            [
                ("u", c),
                ("n", surf_normal)
            ],
            overwrite=True
        )

        df = dof_desc.DOFDesc(FACE_RESTR_INTERIOR)
        face_discr = dcoll.discr_from_dd(df)
        face_normal = thaw(dcoll.normal(dd=df), actx)

        from meshmode.discretization.visualization import make_visualizer
        vis = make_visualizer(actx, face_discr)
        vis.write_vtk_file("fld-surface-face-normals.vtu", [
            ("n", face_normal)
            ], overwrite=True)

    for event in dt_stepper.run(t_end=final_time, max_steps=nsteps + 1):
        if not isinstance(event, dt_stepper.StateComputed):
            continue

        step += 1
        if step % 10 == 0:
            norm_u = actx.to_numpy(op.norm(dcoll, event.state_component, 2))
            plot(event, "fld-surface-%04d" % step)

        logger.info("[%04d] t = %.5f |u| = %.5e", step, event.t, norm_u)

        # NOTE: These are here to ensure the solution is bounded for the
        # time interval specified
        assert norm_u < 3
예제 #6
0
파일: weak.py 프로젝트: nchristensen/grudge
def main(ctx_factory, dim=2, order=4, visualize=False):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    # {{{ parameters

    # domain [-d/2, d/2]^dim
    d = 1.0
    # number of points in each dimension
    npoints = 20

    # final time
    final_time = 1.0

    # velocity field
    c = np.array([0.5] * dim)
    norm_c = la.norm(c)

    # flux
    flux_type = "central"

    # }}}

    # {{{ discretization

    from meshmode.mesh.generation import generate_box_mesh
    mesh = generate_box_mesh(
        [np.linspace(-d / 2, d / 2, npoints) for _ in range(dim)], order=order)

    from grudge import DiscretizationCollection

    dcoll = DiscretizationCollection(actx, mesh, order=order)

    # }}}

    # {{{ weak advection operator

    def f(x):
        return actx.np.sin(3 * x)

    def u_analytic(x, t=0):
        return f(-np.dot(c, x) / norm_c + t * norm_c)

    from grudge.models.advection import WeakAdvectionOperator

    adv_operator = WeakAdvectionOperator(
        dcoll,
        c,
        inflow_u=lambda t: u_analytic(thaw(dcoll.nodes(dd=BTAG_ALL), actx),
                                      t=t),
        flux_type=flux_type)

    nodes = thaw(dcoll.nodes(), actx)
    u = u_analytic(nodes, t=0)

    def rhs(t, u):
        return adv_operator.operator(t, u)

    dt = actx.to_numpy(
        adv_operator.estimate_rk4_timestep(actx, dcoll, fields=u))

    logger.info("Timestep size: %g", dt)

    # }}}

    # {{{ time stepping

    from grudge.shortcuts import set_up_rk4
    dt_stepper = set_up_rk4("u", dt, u, rhs)
    plot = Plotter(actx, dcoll, order, visualize=visualize, ylim=[-1.1, 1.1])

    step = 0
    norm_u = 0.0
    for event in dt_stepper.run(t_end=final_time):
        if not isinstance(event, dt_stepper.StateComputed):
            continue

        if step % 10 == 0:
            norm_u = actx.to_numpy(op.norm(dcoll, event.state_component, 2))
            plot(event, "fld-weak-%04d" % step)

        step += 1
        logger.info("[%04d] t = %.5f |u| = %.5e", step, event.t, norm_u)

        # NOTE: These are here to ensure the solution is bounded for the
        # time interval specified
        assert norm_u < 1
예제 #7
0
def main(ctx_factory, dim=2, order=4, visualize=False):
    cl_ctx = cl.create_some_context()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    comm = MPI.COMM_WORLD
    num_parts = comm.Get_size()

    from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis
    mesh_dist = MPIMeshDistributor(comm)

    if mesh_dist.is_mananger_rank():
        from meshmode.mesh.generation import generate_regular_rect_mesh
        mesh = generate_regular_rect_mesh(
                a=(-0.5,)*dim,
                b=(0.5,)*dim,
                nelements_per_axis=(16,)*dim)

        logger.info("%d elements", mesh.nelements)

        part_per_element = get_partition_by_pymetis(mesh, num_parts)

        local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts)

        del mesh

    else:
        local_mesh = mesh_dist.receive_mesh_part()

    dcoll = DiscretizationCollection(actx, local_mesh, order=order,
            mpi_communicator=comm)

    def source_f(actx, dcoll, t=0):
        source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim]
        source_width = 0.05
        source_omega = 3
        nodes = thaw(dcoll.nodes(), actx)
        source_center_dist = flat_obj_array(
            [nodes[i] - source_center[i] for i in range(dcoll.dim)]
        )
        return (
            np.sin(source_omega*t)
            * actx.np.exp(
                -np.dot(source_center_dist, source_center_dist)
                / source_width**2
            )
        )

    from grudge.models.wave import WeakWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE

    wave_op = WeakWaveOperator(
        dcoll,
        0.1,
        source_f=source_f,
        dirichlet_tag=BTAG_NONE,
        neumann_tag=BTAG_NONE,
        radiation_tag=BTAG_ALL,
        flux_type="upwind"
    )

    fields = flat_obj_array(
        dcoll.zeros(actx),
        [dcoll.zeros(actx) for i in range(dcoll.dim)]
    )

    dt = actx.to_numpy(
        2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields))

    wave_op.check_bc_coverage(local_mesh)

    def rhs(t, w):
        return wave_op.operator(t, w)

    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    final_t = 10
    nsteps = int(final_t/dt) + 1

    if comm.rank == 0:
        logger.info("dt=%g nsteps=%d", dt, nsteps)

    from grudge.shortcuts import make_visualizer
    vis = make_visualizer(dcoll)

    step = 0

    def norm(u):
        return op.norm(dcoll, u, 2)

    from time import time
    t_last_step = time()

    if visualize:
        u = fields[0]
        v = fields[1:]
        vis.write_parallel_vtk_file(
            comm,
            f"fld-wave-min-mpi-{{rank:03d}}-{step:04d}.vtu",
            [
                ("u", u),
                ("v", v),
            ]
        )

    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1
            l2norm = norm(u=event.state_component[0])

            if step % 10 == 0:
                if comm.rank == 0:
                    logger.info(f"step: {step} "
                                f"t: {time()-t_last_step} "
                                f"L2: {l2norm}")
                if visualize:
                    vis.write_parallel_vtk_file(
                        comm,
                        f"fld-wave-min-mpi-{{rank:03d}}-{step:04d}.vtu",
                        [
                            ("u", event.state_component[0]),
                            ("v", event.state_component[1:]),
                        ]
                    )
            t_last_step = time()

            # NOTE: These are here to ensure the solution is bounded for the
            # time interval specified
            assert l2norm < 1
예제 #8
0
def main(ctx_factory, dim=2, order=4, visualize=False):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(
        queue,
        allocator=cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue)),
        force_device_scalars=True,
    )

    from meshmode.mesh.generation import generate_regular_rect_mesh
    mesh = generate_regular_rect_mesh(
            a=(-0.5,)*dim,
            b=(0.5,)*dim,
            nelements_per_axis=(20,)*dim)

    dcoll = DiscretizationCollection(actx, mesh, order=order)

    def source_f(actx, dcoll, t=0):
        source_center = np.array([0.1, 0.22, 0.33])[:dcoll.dim]
        source_width = 0.05
        source_omega = 3
        nodes = thaw(dcoll.nodes(), actx)
        source_center_dist = flat_obj_array(
            [nodes[i] - source_center[i] for i in range(dcoll.dim)]
        )
        return (
            np.sin(source_omega*t)
            * actx.np.exp(
                -np.dot(source_center_dist, source_center_dist)
                / source_width**2
            )
        )

    x = thaw(dcoll.nodes(), actx)
    ones = dcoll.zeros(actx) + 1
    c = actx.np.where(np.dot(x, x) < 0.15, 0.1 * ones, 0.2 * ones)

    from grudge.models.wave import VariableCoefficientWeakWaveOperator
    from meshmode.mesh import BTAG_ALL, BTAG_NONE

    wave_op = VariableCoefficientWeakWaveOperator(
        dcoll,
        c,
        source_f=source_f,
        dirichlet_tag=BTAG_NONE,
        neumann_tag=BTAG_NONE,
        radiation_tag=BTAG_ALL,
        flux_type="upwind"
    )

    fields = flat_obj_array(
        dcoll.zeros(actx),
        [dcoll.zeros(actx) for i in range(dcoll.dim)]
    )

    wave_op.check_bc_coverage(mesh)

    def rhs(t, w):
        return wave_op.operator(t, w)

    dt = actx.to_numpy(
        2/3 * wave_op.estimate_rk4_timestep(actx, dcoll, fields=fields))
    dt_stepper = set_up_rk4("w", dt, fields, rhs)

    final_t = 1
    nsteps = int(final_t/dt) + 1

    logger.info("dt=%g nsteps=%d", dt, nsteps)

    from grudge.shortcuts import make_visualizer
    vis = make_visualizer(dcoll)

    step = 0

    def norm(u):
        return op.norm(dcoll, u, 2)

    from time import time
    t_last_step = time()

    if visualize:
        u = fields[0]
        v = fields[1:]
        vis.write_vtk_file(
            f"fld-var-propogation-speed-{step:04d}.vtu",
            [
                ("u", u),
                ("v", v),
                ("c", c),
            ]
        )

    for event in dt_stepper.run(t_end=final_t):
        if isinstance(event, dt_stepper.StateComputed):
            assert event.component_id == "w"

            step += 1

            if step % 10 == 0:
                logger.info(f"step: {step} t: {time()-t_last_step} "
                            f"L2: {norm(u=event.state_component[0])}")
                if visualize:
                    vis.write_vtk_file(
                        f"fld-var-propogation-speed-{step:04d}.vtu",
                        [
                            ("u", event.state_component[0]),
                            ("v", event.state_component[1:]),
                            ("c", c),
                        ]
                    )
            t_last_step = time()

            # NOTE: These are here to ensure the solution is bounded for the
            # time interval specified
            assert norm(u=event.state_component[0]) < 1