def test_omp_region_no_slice_no_const_bounds(): ''' Test that we generate the correct code when we apply an OpenMP PARALLEL region transformation to a list of nodes when the Schedule has been transformed to use loop-bound look-ups ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompr = OMPParallelTrans() cbtrans = GOConstLoopBoundsTrans() newsched, _ = cbtrans.apply(schedule, const_bounds=False) omp_schedule, _ = ompr.apply(newsched.children) # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) gen = gen.lower() # Iterate over the lines of generated code within_omp_region = False call_count = 0 for line in gen.split('\n'): if '!$omp parallel default' in line: within_omp_region = True if '!$omp end parallel' in line: within_omp_region = False if ' call ' in line and within_omp_region: call_count += 1 assert call_count == 3
def test_omp_region_with_single_loop(): ''' Test that we can pass the OpenMP PARALLEL region transformation a single node in a schedule ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompr = OMPParallelTrans() cbtrans = GOConstLoopBoundsTrans() omp_schedule, _ = ompr.apply(schedule.children[1]) # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) gen = gen.lower() # Iterate over the lines of generated code within_omp_region = False call_count = 0 for line in gen.split('\n'): if '!$omp parallel default' in line: within_omp_region = True if '!$omp end parallel' in line: within_omp_region = False if ' call ' in line and within_omp_region: call_count += 1 assert call_count == 1 # Repeat the test after turning off constant loop bounds newsched, _ = cbtrans.apply(omp_schedule, const_bounds=False) invoke.schedule = newsched gen = str(psy.gen) gen = gen.lower() within_omp_region = False call_count = 0 for line in gen.split('\n'): if '!$omp parallel default' in line: within_omp_region = True if '!$omp end parallel' in line: within_omp_region = False if ' call ' in line and within_omp_region: call_count += 1 assert call_count == 1
def test_loop_fuse_different_iterates_over(): ''' Test that an appropriate error is raised when we attempt to fuse two loops that have differing values of ITERATES_OVER ''' _, invoke = get_invoke("test11_different_iterates_over_" "one_invoke.f90", 0) schedule = invoke.schedule lftrans = LoopFuseTrans() cbtrans = GOConstLoopBoundsTrans() # Attempt to fuse two loops that are iterating over different # things with pytest.raises(TransformationError): _, _ = lftrans.apply(schedule.children[0], schedule.children[1]) # Turn off constant loop bounds (which should have no effect) # and repeat newsched, _ = cbtrans.apply(schedule, const_bounds=False) with pytest.raises(TransformationError): _, _ = lftrans.apply(newsched.children[0], newsched.children[1])
def test_omp_parallel_loop(): '''Test that we can generate an OMP PARALLEL DO correctly, independent of whether or not we are generating constant loop bounds ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule omp = GOceanOMPParallelLoopTrans() cbtrans = GOConstLoopBoundsTrans() omp_sched, _ = omp.apply(schedule.children[0]) invoke.schedule = omp_sched gen = str(psy.gen) gen = gen.lower() expected = ("!$omp parallel do default(shared), private(j,i), " "schedule(static)\n" " do j=2,jstop\n" " do i=2,istop+1\n" " call compute_cu_code(i, j, cu_fld%data, " "p_fld%data, u_fld%data)\n" " end do \n" " end do \n" " !$omp end parallel do") assert expected in gen newsched, _ = cbtrans.apply(omp_sched, const_bounds=False) invoke.schedule = newsched gen = str(psy.gen) gen = gen.lower() expected = ( " !$omp parallel do default(shared), private(j,i), " "schedule(static)\n" " do j=cu_fld%internal%ystart,cu_fld%internal%ystop\n" " do i=cu_fld%internal%xstart,cu_fld%internal%xstop\n" " call compute_cu_code(i, j, cu_fld%data, p_fld%data, " "u_fld%data)\n" " end do \n" " end do \n" " !$omp end parallel do") assert expected in gen
def test_driver_creation(tmpdir): '''Test that driver is created correctly for all variable access \ modes (input, input-output, output). ''' # Use tmpdir so that the driver is created in tmp tmpdir.chdir() etrans = GOceanExtractTrans() ctrans = GOConstLoopBoundsTrans() psy, invoke = get_invoke("driver_test.f90", GOCEAN_API, idx=0, dist_mem=False) schedule = invoke.schedule # This test expects constant loop bounds ctrans.apply(schedule) etrans.apply(schedule.children[0], {'create_driver': True}) # We are only interested in the driver, so ignore results. str(psy.gen) driver = tmpdir.join("driver-psy_extract_example_with_various_variable_" "access_patterns-invoke_0_compute_kernel:compute_" "kernel_code:r0.f90") assert driver.isfile() with driver.open("r") as driver_file: driver_code = driver_file.read() # This is an excerpt of the code that should get created. # It is tested line by line since there is other code in between # which is not important, and the order might also change. It also # tests if unique variable names are created in the driver: the user # program contains a local variable 'dx', which clashes with the grid # property dx. The grid property will be renamed to 'dx_1': expected = '''USE extract_psy_data_mod, ONLY: extract_PSyDataType IMPLICIT NONE REAL(KIND=8), allocatable, dimension(:,:) :: gphiu REAL(KIND=8), allocatable, dimension(:,:) :: out_fld REAL(KIND=8), allocatable, dimension(:,:) :: out_fld_post REAL(KIND=8), allocatable, dimension(:,:) :: in_fld REAL(KIND=8), allocatable, dimension(:,:) :: dx REAL(KIND=8), allocatable, dimension(:,:) :: dx_1 REAL(KIND=8), allocatable, dimension(:,:) :: in_out_fld REAL(KIND=8), allocatable, dimension(:,:) :: in_out_fld_post TYPE(extract_PSyDataType) extract_psy_data CALL extract_psy_data%OpenRead("psy_extract_example_with_various_variable''' \ '''_access_patterns", "invoke_0_compute_kernel:compute_kernel_code:r0") CALL extract_psy_data%ReadVariable("out_fld_post", out_fld_post) ALLOCATE (out_fld, mold=out_fld_post) out_fld = 0.0 CALL extract_psy_data%ReadVariable("in_fld", in_fld) CALL extract_psy_data%ReadVariable("in_out_fld_post", in_out_fld_post) CALL extract_psy_data%ReadVariable("dx", dx) CALL extract_psy_data%ReadVariable("in_fld%grid%dx", dx_1) CALL extract_psy_data%ReadVariable("in_fld%grid%gphiu", gphiu) ! RegionStart DO j=2,jstop DO i=2,istop+1 CALL compute_kernel_code(i, j, out_fld, in_out_fld, in_fld, ''' \ '''dx, dx_1, gphiu) END DO END DO ! RegionEnd ! ! Check out_fld ! Check i ! Check j ! Check in_out_fld''' expected_lines = expected.split("\n") for line in expected_lines: assert line in driver_code
def test_node_list_ompparallel_gocean1p0(): ''' Test that applying Extract Transformation on a list of Nodes enclosed within an OMP Parallel Region produces the correct result in GOcean1.0 API. ''' etrans = GOceanExtractTrans() ltrans = GOceanOMPLoopTrans() otrans = OMPParallelTrans() ctrans = GOConstLoopBoundsTrans() # Test a Loop nested within the OMP Parallel DO Directive psy, invoke = get_invoke("single_invoke_three_kernels.f90", GOCEAN_API, idx=0, dist_mem=False) schedule = invoke.schedule # Apply GOConstLoopBoundsTrans ctrans.apply(schedule) # Apply GOceanOMPParallelLoopTrans to the first two Loops ltrans.apply(schedule.children[0]) ltrans.apply(schedule.children[1]) # and enclose them within a parallel region otrans.apply(schedule.children[0:2]) # Now enclose the parallel region within an ExtractNode (inserted # at the previous location of the OMPParallelDirective etrans.apply(schedule.children[0]) code = str(psy.gen) output = """ ! ExtractStart ! CALL extract_psy_data%PreStart("psy_single_invoke_three_kernels", """ \ """"invoke_0:r0", 3, 4) CALL extract_psy_data%PreDeclareVariable("p_fld", p_fld) CALL extract_psy_data%PreDeclareVariable("u_fld", u_fld) CALL extract_psy_data%PreDeclareVariable("v_fld", v_fld) CALL extract_psy_data%PreDeclareVariable("cu_fld_post", cu_fld) CALL extract_psy_data%PreDeclareVariable("cv_fld_post", cv_fld) CALL extract_psy_data%PreDeclareVariable("i_post", i) CALL extract_psy_data%PreDeclareVariable("j_post", j) CALL extract_psy_data%PreEndDeclaration CALL extract_psy_data%ProvideVariable("p_fld", p_fld) CALL extract_psy_data%ProvideVariable("u_fld", u_fld) CALL extract_psy_data%ProvideVariable("v_fld", v_fld) CALL extract_psy_data%PreEnd !$omp parallel default(shared), private(i,j) !$omp do schedule(static) DO j=2,jstop DO i=2,istop+1 CALL compute_cu_code(i, j, cu_fld%data, p_fld%data, u_fld%data) END DO END DO !$omp end do !$omp do schedule(static) DO j=2,jstop+1 DO i=2,istop CALL compute_cv_code(i, j, cv_fld%data, p_fld%data, v_fld%data) END DO END DO !$omp end do !$omp end parallel CALL extract_psy_data%PostStart CALL extract_psy_data%ProvideVariable("cu_fld_post", cu_fld) CALL extract_psy_data%ProvideVariable("cv_fld_post", cv_fld) CALL extract_psy_data%ProvideVariable("i_post", i) CALL extract_psy_data%ProvideVariable("j_post", j) CALL extract_psy_data%PostEnd ! ! ExtractEnd""" assert output in code
def test_single_node_ompparalleldo_gocean1p0_failing_const_loop(): ''' Test that applying Extract Transformation on a Node enclosed within an OMP Parallel DO Directive produces the correct result in GOcean1.0 API. This test is mostly identical to the previous one, but uses const loop bounds. At this stage, the dependency analysis still reports `cv_fld%internal%xstart` etc as loop boundaries, but the code created will be using istop and jstop. ''' etrans = GOceanExtractTrans() otrans = GOceanOMPParallelLoopTrans() ctrans = GOConstLoopBoundsTrans() # Test a Loop nested within the OMP Parallel DO Directive psy, invoke = get_invoke("single_invoke_three_kernels.f90", GOCEAN_API, idx=0, dist_mem=False) schedule = invoke.schedule ctrans.apply(schedule) # Required for #969 schedule.view() # Apply GOceanOMPParallelLoopTrans to the second Loop otrans.apply(schedule.children[1]) # Now enclose the parallel region within an ExtractNode (inserted # at the previous location of the OMPParallelDoDirective etrans.apply(schedule.children[1]) code = str(psy.gen) output = """ ! ExtractStart ! CALL extract_psy_data%PreStart("psy_single_invoke_three_kernels", """ \ """"invoke_0:compute_cv_code:r0", 6, 3) CALL extract_psy_data%PreDeclareVariable("istop", istop) CALL extract_psy_data%PreDeclareVariable("jstop", jstop) CALL extract_psy_data%PreDeclareVariable("p_fld", p_fld) CALL extract_psy_data%PreDeclareVariable("v_fld", v_fld) CALL extract_psy_data%PreDeclareVariable("cv_fld_post", cv_fld) CALL extract_psy_data%PreDeclareVariable("i_post", i) CALL extract_psy_data%PreDeclareVariable("j_post", j) CALL extract_psy_data%PreEndDeclaration CALL extract_psy_data%ProvideVariable("istop", istop) CALL extract_psy_data%ProvideVariable("jstop", jstop) CALL extract_psy_data%ProvideVariable("p_fld", p_fld) CALL extract_psy_data%ProvideVariable("v_fld", v_fld) CALL extract_psy_data%PreEnd !$omp parallel do default(shared), private(i,j), schedule(static) DO j=2,jstop+1 DO i=2,istop CALL compute_cv_code(i, j, cv_fld%data, p_fld%data, v_fld%data) END DO END DO !$omp end parallel do CALL extract_psy_data%PostStart CALL extract_psy_data%ProvideVariable("cv_fld_post", cv_fld) CALL extract_psy_data%ProvideVariable("i_post", i) CALL extract_psy_data%ProvideVariable("j_post", j) CALL extract_psy_data%PostEnd ! ! ExtractEnd""" assert output in code