def test_omp_transform(): '''Tests that the profiling transform works correctly with OMP parallelisation.''' _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule # This test expects constant loop bounds schedule._const_loop_bounds = True prt = ProfileTrans() omp_loop = GOceanOMPLoopTrans() omp_par = OMPParallelTrans() # Parallelise the first loop: omp_loop.apply(schedule[0]) omp_par.apply(schedule[0]) prt.apply(schedule[0]) correct = ( " CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " "\"invoke_loop1:bc_ssh_code:r0\", 0, 0)\n" " !$omp parallel default(shared), private(i,j)\n" " !$omp do schedule(static)\n" " DO j=2,jstop\n" " DO i=2,istop\n" " CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n" " END DO\n" " END DO\n" " !$omp end do\n" " !$omp end parallel\n" " CALL profile_psy_data%PostEnd") code = str(invoke.gen()) assert correct in code # Now add another profile node between the omp parallel and omp do # directives: prt.apply(schedule[0].profile_body[0].dir_body[0]) code = str(invoke.gen()) correct = \ "CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " + \ '''"invoke_loop1:bc_ssh_code:r0", 0, 0) !$omp parallel default(shared), private(i,j) CALL profile_psy_data_1%PreStart("psy_test27_loop_swap", ''' + \ '''"invoke_loop1:bc_ssh_code:r1", 0, 0) !$omp do schedule(static) DO j=2,jstop DO i=2,istop CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask) END DO END DO !$omp end do CALL profile_psy_data_1%PostEnd !$omp end parallel CALL profile_psy_data%PostEnd''' assert correct in code
def test_omp_transform(): '''Tests that the profiling transform works correctly with OMP parallelisation.''' _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1") schedule = invoke.schedule prt = ProfileRegionTrans() omp_loop = GOceanOMPLoopTrans() omp_par = OMPParallelTrans() # Parallelise the first loop: sched1, _ = omp_loop.apply(schedule.children[0]) sched2, _ = omp_par.apply(sched1.children[0]) sched3, _ = prt.apply(sched2.children[0]) correct = ( " CALL ProfileStart(\"boundary_conditions_ne_offset_mod\", " "\"bc_ssh_code\", profile)\n" " !$omp parallel default(shared), private(i,j)\n" " !$omp do schedule(static)\n" " DO j=2,jstop\n" " DO i=2,istop\n" " CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n" " END DO \n" " END DO \n" " !$omp end do\n" " !$omp end parallel\n" " CALL ProfileEnd(profile)") code = str(invoke.gen()) assert correct in code # Now add another profile node between the omp parallel and omp do # directives: sched3, _ = prt.apply(sched3.children[0].children[0].children[0]) code = str(invoke.gen()) correct = ''' CALL ProfileStart("boundary_conditions_ne_offset_mod", \ "bc_ssh_code", profile) !$omp parallel default(shared), private(i,j) CALL ProfileStart("boundary_conditions_ne_offset_mod", "bc_ssh_code_1", \ profile_1) !$omp do schedule(static) DO j=2,jstop DO i=2,istop CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask) END DO\x20 END DO\x20 !$omp end do CALL ProfileEnd(profile_1) !$omp end parallel CALL ProfileEnd(profile)''' assert correct in code
def test_transform_errors(capsys): '''Tests error handling of the profile region transformation.''' # This has been imported and tested before, so we can assume # here that this all works as expected/ _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule prt = ProfileTrans() # Just to be sure: also check that the right order does indeed work! sched1, _ = prt.apply( [schedule.children[0], schedule.children[1], schedule.children[2]]) sched1.view() out, _ = capsys.readouterr() # out is unicode, and has no replace function, so convert to string first out = str(out).replace("\n", "") correct_re = (".*GOInvokeSchedule.*?" r"Profile.*?" r"Loop.*\[type='outer'.*?" r"Loop.*\[type='outer'.*?" r"Loop.*\[type='outer'") assert re.search(correct_re, out) # Test that we don't add a profile node inside a OMP do loop (which # would be invalid syntax): _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule prt = ProfileTrans() omp_loop = GOceanOMPLoopTrans() # Parallelise the first loop: sched1, _ = omp_loop.apply(schedule[0]) # Inserting a ProfileTrans inside a omp do loop is syntactically # incorrect, the inner part must be a do loop only: with pytest.raises(TransformationError) as excinfo: prt.apply(sched1[0].dir_body[0]) assert "A PSyData node cannot be inserted between an OpenMP/ACC "\ "directive and the loop(s) to which it applies!" \ in str(excinfo.value) with pytest.raises(TransformationError) as excinfo: prt.apply(sched1[0], {"region_name": "xx"}) assert "Error in ProfileTrans. User-supplied region name must be a " \ "tuple containing two non-empty strings" in str(excinfo.value)
def test_node_list_ompparallel_gocean1p0(): ''' Test that applying Extract Transformation on a list of Nodes enclosed within an OMP Parallel Region produces the correct result in GOcean1.0 API. ''' from psyclone.transformations import GOceanOMPLoopTrans, OMPParallelTrans etrans = GOceanExtractRegionTrans() ltrans = GOceanOMPLoopTrans() otrans = OMPParallelTrans() # Test a Loop nested within the OMP Parallel DO Directive _, invoke_info = parse(os.path.join(GOCEAN_BASE_PATH, "single_invoke_three_kernels.f90"), api=GOCEAN_API) psy = PSyFactory(GOCEAN_API, distributed_memory=False).create(invoke_info) invoke = psy.invokes.invoke_list[0] schedule = invoke.schedule # Apply GOceanOMPParallelLoopTrans to the first two Loops schedule, _ = ltrans.apply(schedule.children[0]) schedule, _ = ltrans.apply(schedule.children[1]) # and enclose them within a parallel region schedule, _ = otrans.apply(schedule.children[0:2]) # Now enclose the parallel region within an ExtractNode (inserted # at the previous location of the OMPParallelDirective schedule, _ = etrans.apply(schedule.children[0]) code = str(psy.gen) output = (" ! ExtractStart\n" " ! CALL write_extract_arguments(argument_list)\n" " !\n" " !$omp parallel default(shared), private(i,j)\n" " !$omp do schedule(static)\n" " DO j=2,jstop\n" " DO i=2,istop+1\n" " CALL compute_cu_code(i, j, cu_fld%data, " "p_fld%data, u_fld%data)\n" " END DO \n" " END DO \n" " !$omp end do\n" " !$omp do schedule(static)\n" " DO j=2,jstop+1\n" " DO i=2,istop\n" " CALL compute_cv_code(i, j, cv_fld%data, " "p_fld%data, v_fld%data)\n" " END DO \n" " END DO \n" " !$omp end do\n" " !$omp end parallel\n" " !\n" " ! ExtractEnd\n") assert output in code
def test_go_omp_loop_applied_to_non_loop(): ''' Test that we raise a TransformationError if we attempt to apply a GOcean OMP DO transformation to something that is not a loop ''' _, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompl = GOceanOMPLoopTrans() omp_schedule, _ = ompl.apply(schedule.children[0]) # Attempt to (erroneously) apply the GO OMP Loop transformation # to the first node in the schedule (which is now itself an # OMP Loop transformation) with pytest.raises(TransformationError): _, _ = ompl.apply(omp_schedule.children[0])
def test_go_omp_loop_applied_to_wrong_loop_type(): ''' Test that we raise a TransformationError if we attempt to apply a GOcean OMP DO transformation to a loop of the wrong type ''' _, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule # Manually break the loop-type of the first loop in order to # test that this error is handled. We have to work-around # the setter method to do this since it has error checking # too! schedule.children[0]._loop_type = "wrong" ompl = GOceanOMPLoopTrans() # Attempt to apply the transformation to the loop that has been # given an incorrect type with pytest.raises(TransformationError): _, _ = ompl.apply(schedule.children[0])
def test_omp_region_with_children_of_different_types(): ''' Test that we can generate code if we have an OpenMP parallel region enclosing children of different types. ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompl = GOceanOMPLoopTrans() ompr = OMPParallelTrans() # Put an OpenMP do directive around one loop omp_schedule, _ = ompl.apply(schedule.children[1]) # Now enclose all of the children within a parallel region schedule, _ = ompr.apply(omp_schedule.children) # Replace the original loop schedule with the transformed one invoke.schedule = schedule # Attempt to generate the transformed code _ = psy.gen
def test_omp_region_before_loops_trans(): ''' Test of the OpenMP PARALLEL region transformation where we do the region transformation before the loop transformations. ''' psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0) schedule = invoke.schedule # Put all of the loops in the schedule within a single # OpenMP region ompr = OMPParallelTrans() omp_schedule, _ = ompr.apply(schedule.children) # Put an OpenMP do directive around each loop contained # in the region ompl = GOceanOMPLoopTrans() for child in omp_schedule.children[0].children: schedule, _ = ompl.apply(child) omp_schedule = schedule # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) # Iterate over the lines of generated code omp_region_idx = -1 omp_do_idx = -1 for idx, line in enumerate(gen.split('\n')): if '!$omp parallel default' in line: omp_region_idx = idx if '!$omp do' in line: omp_do_idx = idx if 'DO j=' in line: break assert omp_region_idx != -1 assert omp_do_idx != -1 assert omp_do_idx - omp_region_idx == 1
def test_omp_region_commutes_with_loop_trans_bounds_lookup(): ''' Test that the OpenMP PARALLEL region and (orphan) loop transformations commute after constant bounds have been switched off - i.e. we get the same result independent of the order in which they are applied. ''' psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0) schedule = invoke.schedule # Turn-off constant loop bounds cbtrans = GOConstLoopBoundsTrans() newsched, _ = cbtrans.apply(schedule, const_bounds=False) # Put an OpenMP do directive around each loop contained # in the schedule ompl = GOceanOMPLoopTrans() for child in newsched.children: omp_schedule, _ = ompl.apply(child) # Now put an OpenMP parallel region around that set of # loops ompr = OMPParallelTrans() schedule, _ = ompr.apply(omp_schedule.children) # Replace the original loop schedule with the transformed one invoke.schedule = schedule # Store the results of applying this code transformation as # a string loop_before_region_gen = str(psy.gen) # Now we do it again but in the opposite order... # ...we re-generate the original schedule here rather than # keeping a (deep) copy of it from earlier as that can # cause resource problems. psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0) schedule = invoke.schedule # Turn-off constant loop bounds cbtrans = GOConstLoopBoundsTrans() schedule, _ = cbtrans.apply(schedule, const_bounds=False) # Put all of the loops in the schedule within a single # OpenMP region ompr = OMPParallelTrans() omp_schedule, _ = ompr.apply(schedule.children) # Put an OpenMP do directive around each loop contained # in the region ompl = GOceanOMPLoopTrans() for child in omp_schedule.children[0].children: schedule, _ = ompl.apply(child) omp_schedule = schedule # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string region_before_loop_gen = str(psy.gen) assert region_before_loop_gen == loop_before_region_gen
def test_omp_do_schedule_guided(): ''' Test that we can specify the schedule of an OMP do as "guided" ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompl = GOceanOMPLoopTrans(omp_schedule="guided") ompr = OMPParallelTrans() # Put an OpenMP do directive around one loop omp_schedule, _ = ompl.apply(schedule.children[1]) # Now enclose it within a parallel region schedule, _ = ompr.apply(omp_schedule.children[1]) # Replace the original loop schedule with the transformed one invoke.schedule = schedule # Attempt to generate the transformed code gen = str(psy.gen) assert '!$omp do schedule(guided)' in gen
def test_omp_schedule_default_static(): ''' Test that if no OMP schedule is specified then we default to "static" ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompl = GOceanOMPLoopTrans() ompr = OMPParallelTrans() # Put an OpenMP do directive around one loop without specifying # the OMP schedule to use omp_schedule, _ = ompl.apply(schedule.children[1]) # Now enclose it within a parallel region schedule, _ = ompr.apply(omp_schedule.children[1]) # Replace the original loop schedule with the transformed one invoke.schedule = schedule # Attempt to generate the transformed code gen = str(psy.gen) assert '!$omp do schedule(static)' in gen
def test_omp_region_retains_kernel_order3(): ''' Test that applying the OpenMP PARALLEL region transformation to a sub-set of nodes (middle 1 of 3) does not change their ordering ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompr = OMPParallelTrans() ompl = GOceanOMPLoopTrans() # Put an OMP Do around the 2nd loop of the schedule omp_schedule, _ = ompl.apply(schedule.children[1]) # Put an OMP Parallel around that single OMP Do schedule, _ = ompr.apply([omp_schedule.children[1]]) # Replace the original loop schedule with the transformed one invoke.schedule = schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) gen = gen.lower() # Iterate over the lines of generated code cu_idx = -1 cv_idx = -1 ts_idx = -1 for idx, line in enumerate(gen.split('\n')): if 'call compute_cu' in line: cu_idx = idx if 'call compute_cv' in line: cv_idx = idx if 'call time_smooth' in line: ts_idx = idx # Kernels should be in order {compute_cu, compute_cv, time_smooth} assert cu_idx < cv_idx and cv_idx < ts_idx
def test_omp_loop_outside_region(): ''' Test that a generation error is raised if we try and have an orphaned OpenMP loop that is not enclosed within a parallel region ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule # Put an OpenMP do directive around each loop contained # in the schedule ompl = GOceanOMPLoopTrans() ompr = OMPParallelTrans() for child in schedule.children: omp_schedule, _ = ompl.apply(child) # Now enclose all but the last loop in a parallel region ompr_schedule, _ = ompr.apply(omp_schedule.children[0:-2]) # Replace the original loop schedule with the transformed one invoke.schedule = ompr_schedule # Attempt to generate the transformed code with pytest.raises(GenerationError): _ = psy.gen
def test_omp_region_commutes_with_loop_trans(): ''' Test that the OpenMP PARALLEL region and (orphan) loop transformations commute - i.e. we get the same result independent of the order in which they are applied. ''' psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0) schedule = invoke.schedule # Put an OpenMP do directive around each loop contained # in the schedule ompl = GOceanOMPLoopTrans() for child in schedule.children: omp_schedule, _ = ompl.apply(child) # Now put an OpenMP parallel region around that set of # loops ompr = OMPParallelTrans() schedule, _ = ompr.apply(omp_schedule.children) # Replace the original loop schedule with the transformed one invoke.schedule = schedule # Store the results of applying this code transformation as # a string loop_before_region_gen = str(psy.gen) # Now we do it again but in the opposite order... # Put all of the loops in the schedule within a single # OpenMP region psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0) schedule = invoke.schedule ompr = OMPParallelTrans() omp_schedule, _ = ompr.apply(schedule.children) # Put an OpenMP do directive around each loop contained # in the region ompl = GOceanOMPLoopTrans() for child in omp_schedule.children[0].children: schedule, _ = ompl.apply(child) omp_schedule = schedule # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string region_before_loop_gen = str(psy.gen) assert region_before_loop_gen == loop_before_region_gen
def test_transform_errors(capsys): '''Tests error handling of the profile region transformation.''' # This has been imported and tested before, so we can assume # here that this all works as expected/ _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1") schedule = invoke.schedule prt = ProfileRegionTrans() with pytest.raises(TransformationError) as excinfo: prt.apply([schedule.children[0].children[0], schedule.children[1]]) assert "supplied nodes are not children of the same parent." \ in str(excinfo) # Supply not a node object: with pytest.raises(TransformationError) as excinfo: prt.apply(5) assert "Argument must be a single Node in a schedule or a list of Nodes " \ "in a schedule but have been passed an object of type: " \ in str(excinfo) # Python 3 reports 'class', python 2 'type' - so just check for both assert "<type 'int'>" in str(excinfo) or "<class 'int'>" in str(excinfo) # Test that it will only allow correctly ordered nodes: with pytest.raises(TransformationError) as excinfo: sched1, _ = prt.apply([schedule.children[1], schedule.children[0]]) assert "Children are not consecutive children of one parent:" \ in str(excinfo) with pytest.raises(TransformationError) as excinfo: sched1, _ = prt.apply([schedule.children[0], schedule.children[2]]) assert "Children are not consecutive children of one parent:" \ in str(excinfo) # Test 3 element lists: first various incorrect ordering: with pytest.raises(TransformationError) as excinfo: sched1, _ = prt.apply( [schedule.children[0], schedule.children[2], schedule.children[1]]) assert "Children are not consecutive children of one parent:" \ in str(excinfo) with pytest.raises(TransformationError) as excinfo: sched1, _ = prt.apply( [schedule.children[1], schedule.children[0], schedule.children[2]]) assert "Children are not consecutive children of one parent:" \ in str(excinfo) # Just to be sure: also check that the right order does indeed work! sched1, _ = prt.apply( [schedule.children[0], schedule.children[1], schedule.children[2]]) sched1.view() out, _ = capsys.readouterr() # out is unicode, and has no replace function, so convert to string first out = str(out).replace("\n", "") correct_re = (".*GOInvokeSchedule.*" r" .*Profile.*" r" .*Loop.*\[type='outer'.*" r" .*Loop.*\[type='outer'.*" r" .*Loop.*\[type='outer'.*") assert re.search(correct_re, out) # Test that we don't add a profile node inside a OMP do loop (which # would be invalid syntax): _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1") schedule = invoke.schedule prt = ProfileRegionTrans() omp_loop = GOceanOMPLoopTrans() # Parallelise the first loop: sched1, _ = omp_loop.apply(schedule.children[0]) # Inserting a ProfileRegion inside a omp do loop is syntactically # incorrect, the inner part must be a do loop only: with pytest.raises(TransformationError) as excinfo: prt.apply(sched1.children[0].children[0]) assert "A ProfileNode cannot be inserted between an OpenMP/ACC directive "\ "and the loop(s) to which it applies!" in str(excinfo)
def test_omp_schedule_guided_with_empty_chunk(): ''' Test that we raise an appropriate error if we miss off the chunksize ''' with pytest.raises(TransformationError): _ = GOceanOMPLoopTrans(omp_schedule="guided, ")
def test_omp_invalid_schedule(): ''' Test that we raise an appropriate error if we specify an invalid omp schedule ''' with pytest.raises(TransformationError): _ = GOceanOMPLoopTrans(omp_schedule="rubbish")
def test_omp_schedule_auto_with_chunk(): ''' Test that we raise an appropriate error if we specify the omp schedule as "auto" but try to provide a chunk size ''' with pytest.raises(TransformationError): _ = GOceanOMPLoopTrans(omp_schedule="auto,4")
def test_node_list_ompparallel_gocean1p0(): ''' Test that applying Extract Transformation on a list of Nodes enclosed within an OMP Parallel Region produces the correct result in GOcean1.0 API. ''' etrans = GOceanExtractTrans() ltrans = GOceanOMPLoopTrans() otrans = OMPParallelTrans() ctrans = GOConstLoopBoundsTrans() # Test a Loop nested within the OMP Parallel DO Directive psy, invoke = get_invoke("single_invoke_three_kernels.f90", GOCEAN_API, idx=0, dist_mem=False) schedule = invoke.schedule # Apply GOConstLoopBoundsTrans ctrans.apply(schedule) # Apply GOceanOMPParallelLoopTrans to the first two Loops ltrans.apply(schedule.children[0]) ltrans.apply(schedule.children[1]) # and enclose them within a parallel region otrans.apply(schedule.children[0:2]) # Now enclose the parallel region within an ExtractNode (inserted # at the previous location of the OMPParallelDirective etrans.apply(schedule.children[0]) code = str(psy.gen) output = """ ! ExtractStart ! CALL extract_psy_data%PreStart("psy_single_invoke_three_kernels", """ \ """"invoke_0:r0", 3, 4) CALL extract_psy_data%PreDeclareVariable("p_fld", p_fld) CALL extract_psy_data%PreDeclareVariable("u_fld", u_fld) CALL extract_psy_data%PreDeclareVariable("v_fld", v_fld) CALL extract_psy_data%PreDeclareVariable("cu_fld_post", cu_fld) CALL extract_psy_data%PreDeclareVariable("cv_fld_post", cv_fld) CALL extract_psy_data%PreDeclareVariable("i_post", i) CALL extract_psy_data%PreDeclareVariable("j_post", j) CALL extract_psy_data%PreEndDeclaration CALL extract_psy_data%ProvideVariable("p_fld", p_fld) CALL extract_psy_data%ProvideVariable("u_fld", u_fld) CALL extract_psy_data%ProvideVariable("v_fld", v_fld) CALL extract_psy_data%PreEnd !$omp parallel default(shared), private(i,j) !$omp do schedule(static) DO j=2,jstop DO i=2,istop+1 CALL compute_cu_code(i, j, cu_fld%data, p_fld%data, u_fld%data) END DO END DO !$omp end do !$omp do schedule(static) DO j=2,jstop+1 DO i=2,istop CALL compute_cv_code(i, j, cv_fld%data, p_fld%data, v_fld%data) END DO END DO !$omp end do !$omp end parallel CALL extract_psy_data%PostStart CALL extract_psy_data%ProvideVariable("cu_fld_post", cu_fld) CALL extract_psy_data%ProvideVariable("cv_fld_post", cv_fld) CALL extract_psy_data%ProvideVariable("i_post", i) CALL extract_psy_data%ProvideVariable("j_post", j) CALL extract_psy_data%PostEnd ! ! ExtractEnd""" assert output in code