def test_prolong_vector(tmpdir): ''' Check that we generate correct code when an inter-grid kernel takes a field vector as argument ''' _, invoke_info = parse(os.path.join(BASE_PATH, "22.4_intergrid_prolong_vec.f90"), api=API) psy = PSyFactory(API, distributed_memory=True).create(invoke_info) output = str(psy.gen) assert Dynamo0p3Build(tmpdir).code_compiles(psy) assert "TYPE(field_type), intent(inout) :: field1(3)" in output assert "TYPE(field_proxy_type) field1_proxy(3)" in output # Make sure we always index into the field arrays assert " field1%" not in output assert " field2%" not in output assert ("ncpc_field1_field2, ncell_field1, field1_proxy(1)%data, " "field1_proxy(2)%data, field1_proxy(3)%data, field2_proxy(1)%data," " field2_proxy(2)%data, field2_proxy(3)%data, ndf_w1" in output) for idx in [1, 2, 3]: assert (" IF (field2_proxy({0})%is_dirty(depth=1)) THEN\n" " CALL field2_proxy({0})%halo_exchange(depth=1)\n" " END IF \n".format(idx) in output) assert ("field1_proxy({0}) = field1({0})%get_proxy()".format(idx) in output) assert "CALL field1_proxy({0})%set_dirty()".format(idx) in output assert "CALL field1_proxy({0})%set_clean(1)".format(idx) in output
def test_2kern_trans(tmpdir, monkeypatch): ''' Check that we generate correct code when we transform two kernels within a single invoke. ''' # Ensure kernel-output directory is uninitialised config = Config.get() monkeypatch.setattr(config, "_kernel_output_dir", "") # Change to temp dir (so kernel written there) old_cwd = tmpdir.chdir() psy, invoke = get_invoke("4.5.2_multikernel_invokes.f90", api="dynamo0.3", idx=0) sched = invoke.schedule kernels = sched.walk(Kern) assert len(kernels) == 5 rtrans = ACCRoutineTrans() _, _ = rtrans.apply(kernels[1]) _, _ = rtrans.apply(kernels[2]) # Generate the code (this triggers the generation of new kernels) code = str(psy.gen).lower() # Find the tags added to the kernel/module names for match in re.finditer('use testkern_any_space_2(.+?)_mod', code): tag = match.group(1) assert ("use testkern_any_space_2{0}_mod, only: " "testkern_any_space_2{0}_code".format(tag) in code) assert "call testkern_any_space_2{0}_code(".format(tag) in code assert os.path.isfile( os.path.join(str(tmpdir), "testkern_any_space_2{0}_mod.f90".format(tag))) assert "use testkern_any_space_2_mod, only" not in code assert "call testkern_any_space_2_code(" not in code assert Dynamo0p3Build(tmpdir).code_compiles(psy) old_cwd.chdir()
def test_2kern_trans(kernel_outputdir): ''' Check that we generate correct code when we transform two kernels within a single invoke. ''' psy, invoke = get_invoke("4.5.2_multikernel_invokes.f90", api="dynamo0.3", idx=0) sched = invoke.schedule kernels = sched.walk(Kern) assert len(kernels) == 5 ktrans = Dynamo0p3KernelConstTrans() _, _ = ktrans.apply(kernels[1], number_of_layers=100) _, _ = ktrans.apply(kernels[2], number_of_layers=100) # Generate the code (this triggers the generation of new kernels) code = str(psy.gen).lower() # Find the tags added to the kernel/module names for match in re.finditer('use testkern_any_space_2(.+?)_mod', code): tag = match.group(1) assert ("use testkern_any_space_2{0}_mod, only: " "testkern_any_space_2{0}_code".format(tag) in code) assert "call testkern_any_space_2{0}_code(".format(tag) in code filepath = os.path.join(str(kernel_outputdir), "testkern_any_space_2{0}_mod.f90".format(tag)) assert os.path.isfile(filepath) assert "nlayers = 100" in open(filepath).read() assert "use testkern_any_space_2_mod, only" not in code assert "call testkern_any_space_2_code(" not in code assert Dynamo0p3Build(kernel_outputdir).code_compiles(psy)
def test_1kern_trans(tmpdir, monkeypatch): ''' Check that we generate the correct code when an invoke contains the same kernel more than once but only one of them is transformed. ''' # Ensure kernel-output directory is uninitialised config = Config.get() monkeypatch.setattr(config, "_kernel_output_dir", "") # Change to temp dir (so kernel written there) old_cwd = tmpdir.chdir() psy, invoke = get_invoke("4_multikernel_invokes.f90", api="dynamo0.3", idx=0) sched = invoke.schedule kernels = sched.coded_kernels() # We will transform the second kernel but not the first kern = kernels[1] rtrans = ACCRoutineTrans() _, _ = rtrans.apply(kern) # Generate the code (this triggers the generation of a new kernel) code = str(psy.gen).lower() tag = re.search('use testkern(.+?)_mod', code).group(1) # We should have a USE for the original kernel and a USE for the new one assert "use testkern{0}_mod, only: testkern{0}_code".format(tag) in code assert "use testkern, only: testkern_code" in code # Similarly, we should have calls to both the original and new kernels assert "call testkern_code(" in code assert "call testkern{0}_code(".format(tag) in code first = code.find("call testkern_code(") second = code.find("call testkern{0}_code(".format(tag)) assert first < second assert Dynamo0p3Build(tmpdir).code_compiles(psy) old_cwd.chdir()
def test_1kern_trans(kernel_outputdir): ''' Check that we generate the correct code when an invoke contains the same kernel more than once but only one of them is transformed. ''' psy, invoke = get_invoke("4_multikernel_invokes.f90", api="dynamo0.3", idx=0) sched = invoke.schedule kernels = sched.coded_kernels() # We will transform the second kernel but not the first kern = kernels[1] rtrans = ACCRoutineTrans() _, _ = rtrans.apply(kern) # Generate the code (this triggers the generation of a new kernel) code = str(psy.gen).lower() tag = re.search('use testkern(.+?)_mod', code).group(1) # We should have a USE for the original kernel and a USE for the new one assert "use testkern{0}_mod, only: testkern{0}_code".format(tag) in code assert "use testkern, only: testkern_code" in code # Similarly, we should have calls to both the original and new kernels assert "call testkern_code(" in code assert "call testkern{0}_code(".format(tag) in code first = code.find("call testkern_code(") second = code.find("call testkern{0}_code(".format(tag)) assert first < second assert Dynamo0p3Build(kernel_outputdir).code_compiles(psy)
def test_gh_inc_nohex_1(tmpdir, monkeypatch): '''If COMPUTE_ANNEXED_DOFS is True, then a gh_inc access to a field in a kernel (iterating to the l1 halo) does not require a halo exchange when the previous writer is known and iterates over dofs to nannexed, halo(1), or halo max depth ''' # ensure that COMPUTE_ANNEXED_DOFS is True config = Config.get() dyn_config = config.api_conf(API) monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", True) # parse and get psy schedule _, info = parse(os.path.join(BASE_PATH, "14.12_halo_wdofs_to_inc.f90"), api=API) psy = PSyFactory(API, distributed_memory=True).create(info) schedule = psy.invokes.invoke_list[0].schedule def check_schedule(schedule): '''Check this schedule has expected structure (loop, haloexchange, loop). In paricular there should be no halo exchange for the write-to-gh_inc dependence. :param schedule: a dynamo0.3 API schedule object :type schedule: :py:class:`psyclone.dynamo0p3.DynInvokeSchedule`. ''' assert len(schedule.children) == 3 loop1 = schedule.children[0] haloex = schedule.children[1] loop2 = schedule.children[2] assert isinstance(loop1, DynLoop) assert isinstance(haloex, DynHaloExchange) assert haloex.field.name == "f2" assert haloex.required() == (True, False) assert isinstance(loop2, DynLoop) # 1st loop should iterate over dofs to nannexed. Check output assert schedule.children[0].upper_bound_name == "nannexed" check_schedule(schedule) # just check compilation here (not later in this test) as # compilation of redundant computation is checked separately assert Dynamo0p3Build(tmpdir).code_compiles(psy) # make 1st loop iterate over dofs to the level 1 halo and check output rc_trans = Dynamo0p3RedundantComputationTrans() rc_trans.apply(schedule.children[0], depth=1) assert schedule.children[0].upper_bound_name == "dof_halo" assert schedule.children[0].upper_bound_halo_depth == 1 check_schedule(schedule) # make 1st loop iterate over dofs to the maximum halo depth and # check output rc_trans.apply(schedule.children[0]) assert schedule.children[0].upper_bound_name == "dof_halo" assert not schedule.children[0].upper_bound_halo_depth check_schedule(schedule)
def test_field_qr_deref(tmpdir): ''' Tests that a call, with a set of fields requiring quadrature, produces correct code when the quadrature is supplied as the component of a derived type. ''' _, invoke_info = parse(os.path.join(BASE_PATH, "1.1.1_single_invoke_qr_deref.f90"), api="dynamo0.3") for dist_mem in [True, False]: psy = PSyFactory("dynamo0.3", distributed_memory=dist_mem).create(invoke_info) assert Dynamo0p3Build(tmpdir).code_compiles(psy) gen = str(psy.gen) print(gen) assert ( " SUBROUTINE invoke_0_testkern_qr_type(f1, f2, m1, a, m2, istp," " qr_data)\n" in gen) assert "TYPE(quadrature_xyoz_type), intent(in) :: qr_data" in gen
def infra_compile(tmpdir_factory, request): '''A per-session initialisation function that sets the compilation flags in the Compile class based on command line options for --compile, --compileopencl, --f90, --f90flags. Then makes sure that the infrastructure files for the dynamo0p3 and gocean1p0 APIs are compiled (if compilation was enabled). ''' from psyclone_test_utils import Compile Compile.store_compilation_flags(request.config) from dynamo0p3_build import Dynamo0p3Build # Create a temporary directory to store the compiled files. # Note that this directory is unique even if compiled in # parallel, i.e. each process has its own copy of the # compiled infrastructure file, which avoids the problem # of synchronisation between the processes. tmpdir = tmpdir_factory.mktemp('dynamo_wrapper') # This is the first instance created. This will trigger # compilation of the infrastructure files. Dynamo0p3Build(tmpdir) from gocean1p0_build import GOcean1p0Build tmpdir = tmpdir_factory.mktemp('dl_esm_inf') GOcean1p0Build(tmpdir)
def test_restrict_prolong_chain(tmpdir, dist_mem): ''' Test when we have a single invoke containing a chain of restrictions and prolongations ''' _, invoke_info = parse(os.path.join(BASE_PATH, "22.2_intergrid_3levels.f90"), api=API) psy = PSyFactory(API, distributed_memory=dist_mem).create(invoke_info) output = str(psy.gen) assert Dynamo0p3Build(tmpdir).code_compiles(psy) expected = ( " ! Look-up mesh objects and loop limits for inter-grid " "kernels\n" " !\n" " mesh_fld_f => fld_f_proxy%vspace%get_mesh()\n" " mesh_fld_m => fld_m_proxy%vspace%get_mesh()\n" " mmap_fld_f_fld_m => mesh_fld_m%get_mesh_map(mesh_fld_f)\n" " cell_map_fld_m => mmap_fld_f_fld_m%get_whole_cell_map()\n") assert expected in output if dist_mem: expected = ( " ncell_fld_f = mesh_fld_f%get_last_halo_cell(depth=2)\n" " ncpc_fld_f_fld_m = mmap_fld_f_fld_m%" "get_ntarget_cells_per_source_cell()\n" " mesh_fld_c => fld_c_proxy%vspace%get_mesh()\n" " mmap_fld_m_fld_c => mesh_fld_c%get_mesh_map(mesh_fld_m)\n" " cell_map_fld_c => mmap_fld_m_fld_c%get_whole_cell_map()\n" " ncell_fld_m = mesh_fld_m%get_last_halo_cell(depth=2)\n" " ncpc_fld_m_fld_c = mmap_fld_m_fld_c%" "get_ntarget_cells_per_source_cell()\n") else: expected = ( " ncell_fld_f = fld_f_proxy%vspace%get_ncell()\n" " ncpc_fld_f_fld_m = mmap_fld_f_fld_m%" "get_ntarget_cells_per_source_cell()\n" " mesh_fld_c => fld_c_proxy%vspace%get_mesh()\n" " mmap_fld_m_fld_c => mesh_fld_c%get_mesh_map(mesh_fld_m)\n" " cell_map_fld_c => mmap_fld_m_fld_c%get_whole_cell_map()\n" " ncell_fld_m = fld_m_proxy%vspace%get_ncell()\n" " ncpc_fld_m_fld_c = mmap_fld_m_fld_c%get_ntarget_cells_" "per_source_cell()\n") assert expected in output # Check that we haven't got duplicated output assert output.count("mesh_fld_m => fld_m_proxy%vspace%get_mesh") == 1 assert output.count("ncell_fld_m = ") == 1 assert output.count("ncell_fld_f = ") == 1 if dist_mem: # Have a potential halo exchange before 1st prolong expected = (" IF (fld_c_proxy%is_dirty(depth=1)) THEN\n" " CALL fld_c_proxy%halo_exchange(depth=1)\n" " END IF \n" " !\n" " DO cell=1,mesh_fld_c%get_last_halo_cell(1)\n") assert expected in output # Since we loop into L1 halo of the coarse mesh, the L1 halo # of the fine(r) mesh will now be clean. Therefore, no halo # swap before the next prolongation expected = (" ! Set halos dirty/clean for fields modified in the " "above loop\n" " !\n" " CALL fld_m_proxy%set_dirty()\n" " CALL fld_m_proxy%set_clean(1)\n" " !\n" " DO cell=1,mesh_fld_m%get_last_halo_cell(1)\n") assert expected in output # Again the L1 halo for fld_f will now be clean but for restriction # we need the L2 halo to be clean. There's a set_clean(1) for # fld_f because the above loop over the coarser fld_m will go # into the L2 halo of fld_f. However, it is a continuous field # so only the L1 halo will actually be clean. expected = (" CALL fld_f_proxy%set_dirty()\n" " CALL fld_f_proxy%set_clean(1)\n" " !\n" " CALL fld_f_proxy%halo_exchange(depth=2)\n" " !\n" " DO cell=1,mesh_fld_m%get_last_halo_cell(1)\n" " !\n" " CALL restrict_kernel_code") assert expected in output # For the final restriction we need the L2 halo of fld_m to be # clean. There's no set_clean() call on fld_m because it is # only updated out to the L1 halo and it is a continuous field # so the shared dofs in the L1 halo will still be dirty. expected = (" CALL fld_m_proxy%set_dirty()\n" " !\n" " CALL fld_m_proxy%halo_exchange(depth=2)\n" " !\n" " DO cell=1,mesh_fld_c%get_last_halo_cell(1)\n" " !\n" " CALL restrict_kernel_code") assert expected in output else: expected = ( " DO cell=1,fld_c_proxy%vspace%get_ncell()\n" " !\n" " CALL prolong_kernel_code(nlayers, cell_map_fld_c(:," "cell), ncpc_fld_m_fld_c, ncell_fld_m, fld_m_proxy%data, " "fld_c_proxy%data, ndf_w1, undf_w1, map_w1, undf_w2, " "map_w2(:,cell))\n" " END DO \n" " DO cell=1,fld_m_proxy%vspace%get_ncell()\n" " !\n" " CALL prolong_kernel_code(nlayers, cell_map_fld_m(:," "cell), ncpc_fld_f_fld_m, ncell_fld_f, fld_f_proxy%data, " "fld_m_proxy%data, ndf_w1, undf_w1, map_w1, undf_w2, " "map_w2(:,cell))\n" " END DO \n" " DO cell=1,fld_m_proxy%vspace%get_ncell()\n" " !\n" " CALL restrict_kernel_code(nlayers, cell_map_fld_m(:," "cell), ncpc_fld_f_fld_m, ncell_fld_f, fld_m_proxy%data, " "fld_f_proxy%data, undf_any_space_1_fld_m, " "map_any_space_1_fld_m(:,cell), ndf_any_space_2_fld_f, " "undf_any_space_2_fld_f, map_any_space_2_fld_f)\n" " END DO \n" " DO cell=1,fld_c_proxy%vspace%get_ncell()\n" " !\n" " CALL restrict_kernel_code(nlayers, cell_map_fld_c(:," "cell), ncpc_fld_m_fld_c, ncell_fld_m, fld_c_proxy%data, " "fld_m_proxy%data, undf_any_space_1_fld_c, " "map_any_space_1_fld_c(:,cell), ndf_any_space_2_fld_m, " "undf_any_space_2_fld_m, map_any_space_2_fld_m)\n") assert expected in output
def test_field_restrict(tmpdir, monkeypatch, annexed): '''Test that we generate correct code for an invoke containing a single restriction operation (read from fine, write to coarse). Check when annexed is False and True as we produce a different number of halo exchanges. ''' config = Config.get() dyn_config = config.api_conf("dynamo0.3") monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", annexed) _, invoke_info = parse(os.path.join(BASE_PATH, "22.1_intergrid_restrict.f90"), api=API) for distmem in [False, True]: psy = PSyFactory(API, distributed_memory=distmem).create(invoke_info) output = str(psy.gen) print(output) assert Dynamo0p3Build(tmpdir).code_compiles(psy) defs = (" USE restrict_kernel_mod, ONLY: restrict_kernel_code\n" " USE mesh_map_mod, ONLY: mesh_map_type\n" " USE mesh_mod, ONLY: mesh_type\n" " TYPE(field_type), intent(inout) :: field1\n" " TYPE(field_type), intent(in) :: field2\n") assert defs in output defs2 = ( " INTEGER nlayers\n" " TYPE(field_proxy_type) field1_proxy, field2_proxy\n" " INTEGER, pointer :: map_any_space_1_field1(:,:) => null(), " "map_any_space_2_field2(:,:) => null()\n" " INTEGER ndf_any_space_1_field1, undf_any_space_1_field1, " "ndf_any_space_2_field2, undf_any_space_2_field2\n" " INTEGER ncell_field2, ncpc_field2_field1\n" " INTEGER, pointer :: cell_map_field1(:,:) => null()\n" " TYPE(mesh_map_type), pointer :: mmap_field2_field1 => " "null()\n" " TYPE(mesh_type), pointer :: mesh_field2 => null()\n" " TYPE(mesh_type), pointer :: mesh_field1 => null()\n") assert defs2 in output inits = (" !\n" " ! Look-up mesh objects and loop limits for inter-grid " "kernels\n" " !\n" " mesh_field2 => field2_proxy%vspace%get_mesh()\n" " mesh_field1 => field1_proxy%vspace%get_mesh()\n" " mmap_field2_field1 => mesh_field1%get_mesh_map(" "mesh_field2)\n" " cell_map_field1 => mmap_field2_field1%" "get_whole_cell_map()\n") if distmem: inits += (" ncell_field2 = mesh_field2%" "get_last_halo_cell(depth=2)\n") else: inits += (" ncell_field2 = field2_proxy%vspace%" "get_ncell()\n") inits += (" ncpc_field2_field1 = mmap_field2_field1%" "get_ntarget_cells_per_source_cell()\n" " !\n" " ! Look-up dofmaps for each function space\n" " !\n" " map_any_space_1_field1 => field1_proxy%vspace%" "get_whole_dofmap()\n" " map_any_space_2_field2 => field2_proxy%vspace%" "get_whole_dofmap()\n") assert inits in output if distmem: # We write out to the L1 halo on the coarse mesh which means # we require up-to-date values out to the L2 halo on the fine. # Since we are incrementing the coarse field we also need # up-to-date values for it in the L1 halo. if not annexed: halo_exchs = ( " ! Call kernels and communication routines\n" " !\n" " IF (field1_proxy%is_dirty(depth=1)) THEN\n" " CALL field1_proxy%halo_exchange(depth=1)\n" " END IF \n" " !\n" " IF (field2_proxy%is_dirty(depth=2)) THEN\n" " CALL field2_proxy%halo_exchange(depth=2)\n" " END IF \n" " !\n" " DO cell=1,mesh_field1%get_last_halo_cell(1)\n") else: halo_exchs = ( " ! Call kernels and communication routines\n" " !\n" " IF (field2_proxy%is_dirty(depth=2)) THEN\n" " CALL field2_proxy%halo_exchange(depth=2)\n" " END IF \n" " !\n" " DO cell=1,mesh_field1%get_last_halo_cell(1)\n") assert halo_exchs in output # We pass the whole dofmap for the fine mesh (we are reading from). # This is associated with the second kernel argument. kern_call = ( " !\n" " CALL restrict_kernel_code(nlayers, " "cell_map_field1(:,cell), ncpc_field2_field1, ncell_field2, " "field1_proxy%data, field2_proxy%data, " "undf_any_space_1_field1, map_any_space_1_field1(:,cell), " "ndf_any_space_2_field2, undf_any_space_2_field2, " "map_any_space_2_field2)\n" " END DO \n" " !\n") assert kern_call in output if distmem: set_dirty = " CALL field1_proxy%set_dirty()\n" assert set_dirty in output
def test_field_prolong(tmpdir): ''' Check that we generate correct psy-layer code for an invoke containing a kernel that performs a prolongation operation ''' _, invoke_info = parse(os.path.join(BASE_PATH, "22.0_intergrid_prolong.f90"), api=API) for distmem in [False, True]: psy = PSyFactory(API, distributed_memory=distmem).create(invoke_info) gen_code = str(psy.gen) assert Dynamo0p3Build(tmpdir).code_compiles(psy) expected = (" USE prolong_kernel_mod, ONLY: prolong_kernel_code\n" " USE mesh_map_mod, ONLY: mesh_map_type\n" " USE mesh_mod, ONLY: mesh_type\n" " TYPE(field_type), intent(inout) :: field1\n" " TYPE(field_type), intent(in) :: field2\n" " INTEGER cell\n") assert expected in gen_code expected = ( " INTEGER ncell_field1, ncpc_field1_field2\n" " INTEGER, pointer :: cell_map_field2(:,:) => null()\n" " TYPE(mesh_map_type), pointer :: " "mmap_field1_field2 => null()\n" " TYPE(mesh_type), pointer :: mesh_field2 => null()\n" " TYPE(mesh_type), pointer :: mesh_field1 => null()\n") assert expected in gen_code expected = ( " ! Look-up mesh objects and loop limits for inter-grid " "kernels\n" " !\n" " mesh_field1 => field1_proxy%vspace%get_mesh()\n" " mesh_field2 => field2_proxy%vspace%get_mesh()\n" " mmap_field1_field2 => mesh_field2%get_mesh_map" "(mesh_field1)\n" " cell_map_field2 => mmap_field1_field2%" "get_whole_cell_map()\n") if distmem: expected += (" ncell_field1 = mesh_field1%get_last_halo_cell(" "depth=2)\n") else: expected += \ " ncell_field1 = field1_proxy%vspace%get_ncell()\n" expected += (" ncpc_field1_field2 = mmap_field1_field2%" "get_ntarget_cells_per_source_cell()\n") assert expected in gen_code if distmem: # We are writing to a continuous field on the fine mesh, we # only need to halo swap to depth one on the coarse. expected = (" IF (field2_proxy%is_dirty(depth=1)) THEN\n" " CALL field2_proxy%halo_exchange(depth=1)\n" " END IF \n" " !\n" " DO cell=1,mesh_field2%get_last_halo_cell(1)\n") assert expected in gen_code else: assert "DO cell=1,field2_proxy%vspace%get_ncell()\n" in gen_code expected = ( " CALL prolong_kernel_code(nlayers, " "cell_map_field2(:,cell), ncpc_field1_field2, ncell_field1, " "field1_proxy%data, field2_proxy%data, ndf_w1, undf_w1, map_w1, " "undf_w2, map_w2(:,cell))\n" " END DO \n") assert expected in gen_code if distmem: set_dirty = " CALL field1_proxy%set_dirty()\n" assert set_dirty in gen_code
def test_field_xyoz(tmpdir): ''' Tests that a call, with a set of fields requiring XYoZ quadrature, produces correct code. ''' _, invoke_info = parse(os.path.join(BASE_PATH, "1.1.0_single_invoke_xyoz_qr.f90"), api=API) psy = PSyFactory(API, distributed_memory=True).create(invoke_info) generated_code = str(psy.gen) print(generated_code) assert Dynamo0p3Build(tmpdir).code_compiles(psy) output_decls = ( " SUBROUTINE invoke_0_testkern_qr_type(f1, f2, m1, a, m2, istp," " qr)\n" " USE testkern_qr, ONLY: testkern_qr_code\n" " USE quadrature_xyoz_mod, ONLY: quadrature_xyoz_type, " "quadrature_xyoz_proxy_type\n" " USE function_space_mod, ONLY: BASIS, DIFF_BASIS\n" " USE mesh_mod, ONLY: mesh_type\n" " REAL(KIND=r_def), intent(in) :: a\n" " INTEGER, intent(in) :: istp\n" " TYPE(field_type), intent(inout) :: f1\n" " TYPE(field_type), intent(in) :: f2, m1, m2\n" " TYPE(quadrature_xyoz_type), intent(in) :: qr\n" " INTEGER cell\n" " REAL(KIND=r_def), allocatable :: basis_w1_qr(:,:,:,:), " "diff_basis_w2_qr(:,:,:,:), basis_w3_qr(:,:,:,:), " "diff_basis_w3_qr(:,:,:,:)\n" " INTEGER dim_w1, diff_dim_w2, dim_w3, diff_dim_w3\n" " REAL(KIND=r_def), pointer :: weights_xy_qr(:) => null(), " "weights_z_qr(:) => null()\n" " INTEGER np_xy_qr, np_z_qr\n" " INTEGER nlayers\n" " TYPE(field_proxy_type) f1_proxy, f2_proxy, m1_proxy, m2_proxy\n" " TYPE(quadrature_xyoz_proxy_type) qr_proxy\n" " INTEGER, pointer :: map_w1(:,:) => null(), " "map_w2(:,:) => null(), map_w3(:,:) => null()\n" " INTEGER ndf_w1, undf_w1, ndf_w2, undf_w2, ndf_w3, undf_w3\n") assert output_decls in generated_code init_output = (" !\n" " ! Initialise field and/or operator proxies\n" " !\n" " f1_proxy = f1%get_proxy()\n" " f2_proxy = f2%get_proxy()\n" " m1_proxy = m1%get_proxy()\n" " m2_proxy = m2%get_proxy()\n" " !\n" " ! Initialise number of layers\n" " !\n" " nlayers = f1_proxy%vspace%get_nlayers()\n" " !\n" " ! Create a mesh object\n" " !\n" " mesh => f1_proxy%vspace%get_mesh()\n" " !\n" " ! Look-up dofmaps for each function space\n" " !\n" " map_w1 => f1_proxy%vspace%get_whole_dofmap()\n" " map_w2 => f2_proxy%vspace%get_whole_dofmap()\n" " map_w3 => m2_proxy%vspace%get_whole_dofmap()\n" " !\n" " ! Initialise number of DoFs for w1\n" " !\n" " ndf_w1 = f1_proxy%vspace%get_ndf()\n" " undf_w1 = f1_proxy%vspace%get_undf()\n" " !\n" " ! Initialise number of DoFs for w2\n" " !\n" " ndf_w2 = f2_proxy%vspace%get_ndf()\n" " undf_w2 = f2_proxy%vspace%get_undf()\n" " !\n" " ! Initialise number of DoFs for w3\n" " !\n" " ndf_w3 = m2_proxy%vspace%get_ndf()\n" " undf_w3 = m2_proxy%vspace%get_undf()\n" " !\n" " ! Look-up quadrature variables\n" " !\n" " qr_proxy = qr%get_quadrature_proxy()\n" " np_xy_qr = qr_proxy%np_xy\n" " np_z_qr = qr_proxy%np_z\n" " weights_xy_qr => qr_proxy%weights_xy\n" " weights_z_qr => qr_proxy%weights_z\n") assert init_output in generated_code compute_output = ( " !\n" " ! Allocate basis/diff-basis arrays\n" " !\n" " dim_w1 = f1_proxy%vspace%get_dim_space()\n" " diff_dim_w2 = f2_proxy%vspace%get_dim_space_diff()\n" " dim_w3 = m2_proxy%vspace%get_dim_space()\n" " diff_dim_w3 = m2_proxy%vspace%get_dim_space_diff()\n" " ALLOCATE (basis_w1_qr(dim_w1, ndf_w1, np_xy_qr, np_z_qr))\n" " ALLOCATE (diff_basis_w2_qr(diff_dim_w2, ndf_w2, np_xy_qr, " "np_z_qr))\n" " ALLOCATE (basis_w3_qr(dim_w3, ndf_w3, np_xy_qr, np_z_qr))\n" " ALLOCATE (diff_basis_w3_qr(diff_dim_w3, ndf_w3, np_xy_qr, " "np_z_qr))\n" " !\n" " ! Compute basis/diff-basis arrays\n" " !\n" " CALL qr%compute_function(BASIS, f1_proxy%vspace, dim_w1, " "ndf_w1, basis_w1_qr)\n" " CALL qr%compute_function(DIFF_BASIS, f2_proxy%vspace, " "diff_dim_w2, ndf_w2, diff_basis_w2_qr)\n" " CALL qr%compute_function(BASIS, m2_proxy%vspace, dim_w3, " "ndf_w3, basis_w3_qr)\n" " CALL qr%compute_function(DIFF_BASIS, m2_proxy%vspace, " "diff_dim_w3, ndf_w3, diff_basis_w3_qr)\n" " !\n" " ! Call kernels and communication routines\n" " !\n" " IF (f2_proxy%is_dirty(depth=1)) THEN\n" " CALL f2_proxy%halo_exchange(depth=1)\n" " END IF \n" " !\n" " IF (m1_proxy%is_dirty(depth=1)) THEN\n" " CALL m1_proxy%halo_exchange(depth=1)\n" " END IF \n" " !\n" " IF (m2_proxy%is_dirty(depth=1)) THEN\n" " CALL m2_proxy%halo_exchange(depth=1)\n" " END IF \n" " !\n" " DO cell=1,mesh%get_last_halo_cell(1)\n" " !\n" " CALL testkern_qr_code(nlayers, f1_proxy%data, f2_proxy%data, " "m1_proxy%data, a, m2_proxy%data, istp, ndf_w1, undf_w1, " "map_w1(:,cell), basis_w1_qr, ndf_w2, undf_w2, map_w2(:,cell), " "diff_basis_w2_qr, ndf_w3, undf_w3, map_w3(:,cell), basis_w3_qr, " "diff_basis_w3_qr, np_xy_qr, np_z_qr, weights_xy_qr, weights_z_qr)\n" " END DO \n" " !\n" " ! Set halos dirty/clean for fields modified in the above loop\n" " !\n" " CALL f1_proxy%set_dirty()\n" " !\n" " !\n" " ! Deallocate basis arrays\n" " !\n" " DEALLOCATE (basis_w1_qr, basis_w3_qr, diff_basis_w2_qr, " "diff_basis_w3_qr)\n" " !\n" " END SUBROUTINE invoke_0_testkern_qr_type") assert compute_output in generated_code
def test_gh_inc_max(tmpdir, monkeypatch, annexed): '''Check we generate correct halo exchange bounds when we have multiple read dependencies. In this case we have a gh_inc with a read-only reader and a gh_inc reader. We also test when annexed is False and True as it affects how many halo exchanges are generated. ''' config = Config.get() dyn_config = config.api_conf(API) monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", annexed) # parse and get psy schedule _, info = parse(os.path.join(BASE_PATH, "14.14_halo_inc_times3.f90"), api=API) psy = PSyFactory(API, distributed_memory=True).create(info) schedule = psy.invokes.invoke_list[0].schedule rc_trans = Dynamo0p3RedundantComputationTrans() def check(haloex, depth): '''check the halo exchange has the expected properties :param haloex: a dynamo0.3 API halo-exchange object :type haloex: :py:class:`psyclone.dynamo0p3.DynHaloExchange`. :param int depth: The expected depth of the halo exchange \ passed in as the first argument ''' assert isinstance(haloex, DynHaloExchange) assert haloex.field.name == "f1" assert haloex.required() == (True, True) assert haloex._compute_halo_depth() == depth if annexed: haloidx = 2 loop1idx = 3 loop2idx = 5 else: haloidx = 4 loop1idx = 5 loop2idx = 7 # f1 halo exchange should be depth 1 : max(1,0) haloex = schedule.children[haloidx] check(haloex, "1") rc_trans.apply(schedule.children[loop2idx], depth=2) # f1 halo exchange should still be depth 1 : max(1,1) haloex = schedule.children[haloidx] check(haloex, "1") rc_trans.apply(schedule.children[loop2idx], depth=3) # f1 halo exchange should be depth 2 (max(1,2) haloex = schedule.children[haloidx] check(haloex, "2") rc_trans.apply(schedule.children[loop2idx]) # f1 halo exchange should be depth max(1,max-1) haloex = schedule.children[haloidx] check(haloex, "max(mesh%get_halo_depth()-1,1)") # just check compilation here as it is the most # complicated. (Note, compilation of redundant computation is # checked separately) assert Dynamo0p3Build(tmpdir).code_compiles(psy) rc_trans.apply(schedule.children[loop1idx]) # f1 halo exchange should be depth max haloex = schedule.children[haloidx] check(haloex, "mesh%get_halo_depth()")
def test_gh_inc_nohex_4(tmpdir, monkeypatch): '''If COMPUTE_ANNEXED_DOFS is False, then a gh_inc access to a field in a kernel (iterating to the l1 halo) does not require a halo exchange when the previous writer is known and iterates over cells to halo(1), halo(2) and halo max depth. Also, if the previous writer is a gh_inc access and its previous writer is unknown then it does require a halo exchange if it writes to halo(1) and requires a speculative halo exchange to halo(n-1) if iterating to halo(n) and a speculative halo exchange to halo(max_depth-1) if iterating to the maximum halo depth ''' # ensure that COMPUTE_ANNEXED_DOFS is False config = Config.get() dyn_config = config.api_conf(API) monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", False) # parse and get psy schedule _, info = parse(os.path.join(BASE_PATH, "14.13_halo_inc_to_inc.f90"), api=API) psy = PSyFactory(API, distributed_memory=True).create(info) schedule = psy.invokes.invoke_list[0].schedule def check(schedule, f1depth, f2depth): '''check that the schedule is modified in the expected way. In particular, check that the depth of the halo exchange for field 'f1' is what we are expecting :param schedule: a dynamo0.3 API schedule object :type schedule: :py:class:`psyclone.dynamo0p3.DynInvokeSchedule`. :param int f1depth: The expected depth of the halo exchange \ associated with field f1 :param int f2depth: The expected depth of the halo exchange \ associated with field f2 ''' assert len(schedule.children) == 4 haloex1 = schedule.children[0] haloex2 = schedule.children[1] loop1 = schedule.children[2] loop2 = schedule.children[3] assert isinstance(haloex1, DynHaloExchange) assert haloex1.field.name == "f1" assert haloex1._compute_halo_depth() == f1depth assert haloex1.required() == (True, False) assert isinstance(haloex2, DynHaloExchange) assert haloex2.field.name == "f2" assert haloex2._compute_halo_depth() == f2depth assert haloex2.required() == (True, False) assert isinstance(loop1, DynLoop) assert isinstance(loop2, DynLoop) # we should now have a speculative halo exchange at the start of # the schedule for "f1" to depth 1 and "f2" to depth 1 check(schedule, f1depth="1", f2depth="1") # just check compilation here (not later in this test) as # compilation of redundant computation is checked separately assert Dynamo0p3Build(tmpdir).code_compiles(psy) # make 1st loop iterate over cells to the level 2 halo and check output rc_trans = Dynamo0p3RedundantComputationTrans() rc_trans.apply(schedule.children[2], depth=2) # we should now have a speculative halo exchange at the start of # the schedule for "f1" to depth 1 and "f2" to depth 2 check(schedule, f1depth="1", f2depth="2") # make 1st loop iterate over cells to the maximum halo depth and # check output rc_trans.apply(schedule.children[2]) # we should now have a speculative halo exchange at the start of # the schedule for "f1" to depth max halo - 1 and "f2" to max halo check(schedule, f1depth="mesh%get_halo_depth()-1", f2depth="mesh%get_halo_depth()")
def test_gh_inc_nohex_2(tmpdir, monkeypatch): '''If COMPUTE_ANNEXED_DOFS is False, then a gh_inc access to a field in a kernel (iterating to the l1 halo) does require a halo exchange when the previous writer is known and iterates over dofs to ndofs but does not if it iterates to halo(1), or halo max depth ''' # ensure that COMPUTE_ANNEXED_DOFS is False config = Config.get() dyn_config = config.api_conf(API) monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", False) # parse and get psy schedule _, info = parse(os.path.join(BASE_PATH, "14.12_halo_wdofs_to_inc.f90"), api=API) psy = PSyFactory(API, distributed_memory=True).create(info) schedule = psy.invokes.invoke_list[0].schedule # 1st loop should iterate over dofs to ndofs. Check output loop1 = schedule.children[0] haloex1 = schedule.children[1] haloex2 = schedule.children[2] loop2 = schedule.children[3] assert len(schedule.children) == 4 assert isinstance(loop1, DynLoop) assert loop1.upper_bound_name == "ndofs" assert isinstance(haloex1, DynHaloExchange) assert haloex1.field.name == "f1" assert haloex1.required() == (True, True) assert isinstance(haloex2, DynHaloExchange) assert haloex2.field.name == "f2" assert haloex2.required() == (True, False) assert isinstance(loop2, DynLoop) # just check compilation here (not later in this test) as # compilation of redundant computation is checked separately assert Dynamo0p3Build(tmpdir).code_compiles(psy) # make 1st loop iterate over dofs to the level 1 halo and check # output. There should be no halo exchange for field "f1" rc_trans = Dynamo0p3RedundantComputationTrans() rc_trans.apply(schedule.children[0], depth=1) loop1 = schedule.children[0] haloex = schedule.children[1] loop2 = schedule.children[2] assert len(schedule.children) == 3 assert isinstance(loop1, DynLoop) assert loop1.upper_bound_name == "dof_halo" assert loop1.upper_bound_halo_depth == 1 assert isinstance(haloex, DynHaloExchange) assert haloex.field.name == "f2" assert haloex.required() == (True, False) assert isinstance(loop2, DynLoop) # make 1st loop iterate over dofs to the maximum halo depth and # check output rc_trans.apply(schedule.children[0]) loop1 = schedule.children[0] haloex = schedule.children[1] loop2 = schedule.children[2] assert len(schedule.children) == 3 assert isinstance(loop1, DynLoop) assert loop1.upper_bound_name == "dof_halo" assert not loop1.upper_bound_halo_depth assert isinstance(haloex, DynHaloExchange) assert haloex.field.name == "f2" assert haloex.required() == (True, False) assert isinstance(loop2, DynLoop)