def trans(psy): ''' PSyclone transformation script for the dynamo0p3 api to apply loop fusion and OpenMP for a particular example.''' otrans = OMPParallelTrans() ltrans = Dynamo0p3OMPLoopTrans() ftrans = DynamoLoopFuseTrans() invoke = psy.invokes.invoke_list[0] schedule = invoke.schedule # loop fuse the two builtin kernels schedule, _ = ftrans.apply(schedule.children[0], schedule.children[1], same_space=True) # Add an OpenMP do directive to the resultant loop-fused loop, # specifying that we want reproducible reductions schedule, _ = ltrans.apply(schedule.children[0], reprod=True) # Add an OpenMP parallel directive around the OpenMP do directive schedule, _ = otrans.apply(schedule.children[0]) # take a look at what we've done schedule.view() schedule.dag() return psy
def test_gocean_omp_parallel(): '''Test that an OMP PARALLEL directive in a 'classical' API (gocean here) is created correctly. ''' from psyclone.transformations import OMPParallelTrans _, invoke = get_invoke("single_invoke.f90", "gocean1.0", idx=0) omp = OMPParallelTrans() omp_sched, _ = omp.apply(invoke.schedule[0]) # Now remove the GOKern (since it's not yet supported in the # visitor pattern) and replace it with a simple assignment # TODO: #440 tracks this replace_child_with_assignment(omp_sched[0]) # omp_sched is a GOInvokeSchedule, which is not yet supported. # So only convert starting from the OMPParallelDirective fvisitor = FortranWriter() result = fvisitor(omp_sched[0]) correct = '''!$omp parallel a=b !$omp end parallel''' assert correct in result cvisitor = CWriter() # Remove newlines for easier RE matching result = cvisitor(omp_sched[0]) correct = '''#pragma omp parallel { a = b; }''' result = cvisitor(omp_sched[0]) assert correct in result
def test_omp_region_no_slice_no_const_bounds(): ''' Test that we generate the correct code when we apply an OpenMP PARALLEL region transformation to a list of nodes when the Schedule has been transformed to use loop-bound look-ups ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompr = OMPParallelTrans() cbtrans = GOConstLoopBoundsTrans() newsched, _ = cbtrans.apply(schedule, const_bounds=False) omp_schedule, _ = ompr.apply(newsched.children) # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) gen = gen.lower() # Iterate over the lines of generated code within_omp_region = False call_count = 0 for line in gen.split('\n'): if '!$omp parallel default' in line: within_omp_region = True if '!$omp end parallel' in line: within_omp_region = False if ' call ' in line and within_omp_region: call_count += 1 assert call_count == 3
def test_omp_region_retains_kernel_order2(): ''' Test that applying the OpenMP PARALLEL region transformation to a sub-set of nodes (first 2 of 3) does not change their ordering ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompr = OMPParallelTrans() omp_schedule, _ = ompr.apply(schedule.children[0:2]) # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) gen = gen.lower() # Iterate over the lines of generated code cu_idx = -1 cv_idx = -1 ts_idx = -1 for idx, line in enumerate(gen.split('\n')): if 'call compute_cu' in line: cu_idx = idx if 'call compute_cv' in line: cv_idx = idx if 'call time_smooth' in line: ts_idx = idx # Kernels should be in order {compute_cu, compute_cv, time_smooth} assert cu_idx < cv_idx and cv_idx < ts_idx
def test_openmp_region(): ''' Test the application of an OpenMP parallel region transformation to a single loop ''' psy, invoke = get_invoke("algorithm/1_single_function.f90", TEST_API, name="invoke_0_testkern_type") schedule = invoke.schedule rtrans = OMPParallelTrans() rtrans.apply(schedule.children[0]) gen = str(psy.gen) # Check that our list of private variables is correct assert "!$omp parallel default(shared), private(cell,map)" in gen for idx, line in enumerate(gen.split('\n')): if "!$omp parallel default(shared)" in line: startpara_idx = idx if "DO cell=1,f1%get_ncell()" in line: do_idx = idx if "CALL f1%vspace%get_cell_dofmap(cell, map)" in line: dmap_idx = idx if "CALL testkern_code(nlayers, ndf, map, f1%data, "\ "f2%data, m1%data)" in line: kcall_idx = idx if "END DO" in line: enddo_idx = idx if "!$omp end parallel" in line: endpara_idx = idx assert do_idx == startpara_idx + 1 assert dmap_idx == do_idx + 1 assert kcall_idx == dmap_idx + 1 assert enddo_idx == kcall_idx + 1 assert endpara_idx == enddo_idx + 1
def test_omp_region_no_slice(): ''' Test that we can pass the OpenMP PARALLEL region transformation a list of nodes specified as node.children ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompr = OMPParallelTrans() omp_schedule, _ = ompr.apply(schedule.children) # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) gen = gen.lower() # Iterate over the lines of generated code within_omp_region = False call_count = 0 for line in gen.split('\n'): if '!$omp parallel default' in line: within_omp_region = True if '!$omp end parallel' in line: within_omp_region = False if ' call ' in line and within_omp_region: call_count += 1 assert call_count == 3
def trans(psy): ''' Applies PSyclone colouring and OpenMP transformations. ''' ctrans = Dynamo0p3ColourTrans() otrans = Dynamo0p3OMPLoopTrans() oregtrans = OMPParallelTrans() # Loop over all of the Invokes in the PSy object for invoke in psy.invokes.invoke_list: print("Transforming invoke '{0}' ...".format(invoke.name)) schedule = invoke.schedule # Colour loops over cells unless they are on discontinuous # spaces (W3, WTHETA and W2V) or over dofs for loop in schedule.loops(): if loop.iteration_space == "cells" \ and loop.field_space.orig_name \ not in DISCONTINUOUS_FUNCTION_SPACES: schedule, _ = ctrans.apply(loop) # Add OpenMP to loops over colours. for loop in schedule.loops(): if loop.loop_type != "colours": schedule, _ = oregtrans.apply(loop) schedule, _ = otrans.apply(loop, reprod=True) schedule.view() return psy
def test_omp_transform(): '''Tests that the profiling transform works correctly with OMP parallelisation.''' _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule # This test expects constant loop bounds schedule._const_loop_bounds = True prt = ProfileTrans() omp_loop = GOceanOMPLoopTrans() omp_par = OMPParallelTrans() # Parallelise the first loop: omp_loop.apply(schedule[0]) omp_par.apply(schedule[0]) prt.apply(schedule[0]) correct = ( " CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " "\"invoke_loop1:bc_ssh_code:r0\", 0, 0)\n" " !$omp parallel default(shared), private(i,j)\n" " !$omp do schedule(static)\n" " DO j=2,jstop\n" " DO i=2,istop\n" " CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n" " END DO\n" " END DO\n" " !$omp end do\n" " !$omp end parallel\n" " CALL profile_psy_data%PostEnd") code = str(invoke.gen()) assert correct in code # Now add another profile node between the omp parallel and omp do # directives: prt.apply(schedule[0].profile_body[0].dir_body[0]) code = str(invoke.gen()) correct = \ "CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " + \ '''"invoke_loop1:bc_ssh_code:r0", 0, 0) !$omp parallel default(shared), private(i,j) CALL profile_psy_data_1%PreStart("psy_test27_loop_swap", ''' + \ '''"invoke_loop1:bc_ssh_code:r1", 0, 0) !$omp do schedule(static) DO j=2,jstop DO i=2,istop CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask) END DO END DO !$omp end do CALL profile_psy_data_1%PostEnd !$omp end parallel CALL profile_psy_data%PostEnd''' assert correct in code
def test_omp_transform(): '''Tests that the profiling transform works correctly with OMP parallelisation.''' _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1") schedule = invoke.schedule prt = ProfileRegionTrans() omp_loop = GOceanOMPLoopTrans() omp_par = OMPParallelTrans() # Parallelise the first loop: sched1, _ = omp_loop.apply(schedule.children[0]) sched2, _ = omp_par.apply(sched1.children[0]) sched3, _ = prt.apply(sched2.children[0]) correct = ( " CALL ProfileStart(\"boundary_conditions_ne_offset_mod\", " "\"bc_ssh_code\", profile)\n" " !$omp parallel default(shared), private(i,j)\n" " !$omp do schedule(static)\n" " DO j=2,jstop\n" " DO i=2,istop\n" " CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n" " END DO \n" " END DO \n" " !$omp end do\n" " !$omp end parallel\n" " CALL ProfileEnd(profile)") code = str(invoke.gen()) assert correct in code # Now add another profile node between the omp parallel and omp do # directives: sched3, _ = prt.apply(sched3.children[0].children[0].children[0]) code = str(invoke.gen()) correct = ''' CALL ProfileStart("boundary_conditions_ne_offset_mod", \ "bc_ssh_code", profile) !$omp parallel default(shared), private(i,j) CALL ProfileStart("boundary_conditions_ne_offset_mod", "bc_ssh_code_1", \ profile_1) !$omp do schedule(static) DO j=2,jstop DO i=2,istop CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask) END DO\x20 END DO\x20 !$omp end do CALL ProfileEnd(profile_1) !$omp end parallel CALL ProfileEnd(profile)''' assert correct in code
def test_omp_region_with_wrong_arg_type(): ''' Test that the OpenMP PARALLEL region transformation raises an appropriate error if passed something that is not a list of Nodes or a single Node. ''' _, invoke = get_invoke("single_invoke_three_kernels.f90", 0) ompr = OMPParallelTrans() with pytest.raises(TransformationError): _, _ = ompr.apply(invoke)
def test_omp_add_region_invalid_data_move(): ''' Check that _add_region() raises the expected error if an invalid value for data_movement is supplied. ''' otrans = OMPParallelTrans() _, invoke_info = get_invoke("explicit_do.f90", api=API, idx=0) schedule = invoke_info.schedule otrans.apply([schedule[0]]) ompdir = schedule[0] with pytest.raises(InternalError) as err: ompdir._add_region("DATA", "END DATA", data_movement="analyse") assert ("the data_movement='analyse' option is only valid for an " "OpenACC directive" in str(err.value))
def test_node_list_ompparallel_gocean1p0(): ''' Test that applying Extract Transformation on a list of Nodes enclosed within an OMP Parallel Region produces the correct result in GOcean1.0 API. ''' from psyclone.transformations import GOceanOMPLoopTrans, OMPParallelTrans etrans = GOceanExtractRegionTrans() ltrans = GOceanOMPLoopTrans() otrans = OMPParallelTrans() # Test a Loop nested within the OMP Parallel DO Directive _, invoke_info = parse(os.path.join(GOCEAN_BASE_PATH, "single_invoke_three_kernels.f90"), api=GOCEAN_API) psy = PSyFactory(GOCEAN_API, distributed_memory=False).create(invoke_info) invoke = psy.invokes.invoke_list[0] schedule = invoke.schedule # Apply GOceanOMPParallelLoopTrans to the first two Loops schedule, _ = ltrans.apply(schedule.children[0]) schedule, _ = ltrans.apply(schedule.children[1]) # and enclose them within a parallel region schedule, _ = otrans.apply(schedule.children[0:2]) # Now enclose the parallel region within an ExtractNode (inserted # at the previous location of the OMPParallelDirective schedule, _ = etrans.apply(schedule.children[0]) code = str(psy.gen) output = (" ! ExtractStart\n" " ! CALL write_extract_arguments(argument_list)\n" " !\n" " !$omp parallel default(shared), private(i,j)\n" " !$omp do schedule(static)\n" " DO j=2,jstop\n" " DO i=2,istop+1\n" " CALL compute_cu_code(i, j, cu_fld%data, " "p_fld%data, u_fld%data)\n" " END DO \n" " END DO \n" " !$omp end do\n" " !$omp do schedule(static)\n" " DO j=2,jstop+1\n" " DO i=2,istop\n" " CALL compute_cv_code(i, j, cv_fld%data, " "p_fld%data, v_fld%data)\n" " END DO \n" " END DO \n" " !$omp end do\n" " !$omp end parallel\n" " !\n" " ! ExtractEnd\n") assert output in code
def test_nemo_omp_parallel(): '''Tests if an OpenMP parallel directive in NEMO is handled correctly. ''' # Generate fparser2 parse tree from Fortran code. code = ''' module test contains subroutine tmp() integer :: i, a integer, dimension(:) :: b do i = 1, 20, 2 a = 2 * i b(i) = b(i) + a enddo end subroutine tmp end module test''' schedule = create_schedule(code, "tmp") from psyclone.transformations import OMPParallelTrans # Now apply a parallel transform omp_par = OMPParallelTrans() # Note that the loop is not handled as nemo kernel, so the # omp node-type-check will find the assignment statement and # prevent application of omp parallel to the loop. So # disable the node type check so that omp parallel is applied. omp_par.apply(schedule[0], {"node-type-check": False}) fvisitor = FortranWriter() result = fvisitor(schedule) correct = '''!$omp parallel private(a,i) do i = 1, 20, 2 a=2 * i b(i)=b(i) + a enddo !$omp end parallel''' assert correct in result cvisitor = CWriter() result = cvisitor(schedule[0]) correct = '''#pragma omp parallel private(a,i) { for(i=1; i<=20; i+=2) { a = (2 * i); b[i] = (b[i] + a); } }''' result = cvisitor(schedule[0]) assert correct in result
def test_parallelregion_refuse_codeblock(): ''' Check that ParallelRegionTrans.validate() rejects a loop nest that encloses a CodeBlock. We use OMPParallelTrans as ParallelRegionTrans is abstract. ''' otrans = OMPParallelTrans() # Construct a valid Loop in the PSyIR with a CodeBlock in its body parent = Loop.create(DataSymbol("ji", INTEGER_TYPE), Literal("1", INTEGER_TYPE), Literal("10", INTEGER_TYPE), Literal("1", INTEGER_TYPE), [CodeBlock([], CodeBlock.Structure.STATEMENT, None)]) with pytest.raises(TransformationError) as err: otrans.validate([parent]) assert ("Nodes of type 'CodeBlock' cannot be enclosed by a " "OMPParallelTrans transformation" in str(err.value))
def test_omp_region_nodes_not_children_of_same_schedule(): ''' Test that we raise appropriate error if user attempts to put a region around nodes that are not children of the same schedule ''' _, invoke1 = get_invoke("test12_two_invokes_two_kernels.f90", 0) schedule1 = invoke1.schedule _, invoke2 = get_invoke("test12_two_invokes_two_kernels.f90", 1) schedule2 = invoke2.schedule ompr = OMPParallelTrans() # Attempt to put an OpenMP parallel region the loops from the # two different schedules with pytest.raises(TransformationError): _, _ = ompr.apply([schedule1.children[0], schedule2.children[0]])
def test_omp_parallel_region_inside_parallel_do(): ''' Test that a generation error is raised if we attempt to have an OpenMP parallel region within an OpenMP parallel do (with the latter applied first) ''' _, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompl = GOceanOMPParallelLoopTrans() ompr = OMPParallelTrans() # Put an OpenMP parallel do directive around one of the loops _, _ = ompl.apply(schedule.children[1]) # Now attempt to put a parallel region inside that parallel do with pytest.raises(TransformationError): _, _ = ompr.apply([schedule.children[1].children[0]])
def test_omp_region_with_single_loop(): ''' Test that we can pass the OpenMP PARALLEL region transformation a single node in a schedule ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompr = OMPParallelTrans() cbtrans = GOConstLoopBoundsTrans() omp_schedule, _ = ompr.apply(schedule.children[1]) # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) gen = gen.lower() # Iterate over the lines of generated code within_omp_region = False call_count = 0 for line in gen.split('\n'): if '!$omp parallel default' in line: within_omp_region = True if '!$omp end parallel' in line: within_omp_region = False if ' call ' in line and within_omp_region: call_count += 1 assert call_count == 1 # Repeat the test after turning off constant loop bounds newsched, _ = cbtrans.apply(omp_schedule, const_bounds=False) invoke.schedule = newsched gen = str(psy.gen) gen = gen.lower() within_omp_region = False call_count = 0 for line in gen.split('\n'): if '!$omp parallel default' in line: within_omp_region = True if '!$omp end parallel' in line: within_omp_region = False if ' call ' in line and within_omp_region: call_count += 1 assert call_count == 1
def test_omp_do_update(): '''Check the OMPDoDirective update function.''' from psyclone.transformations import OMPLoopTrans, OMPParallelTrans from psyclone.psyGen import OMPDoDirective _, invoke_info = parse(os.path.join(BASE_PATH, "imperfect_nest.f90"), api=API, line_length=False) psy = PSyFactory(API, distributed_memory=False).create(invoke_info) schedule = psy.invokes.get('imperfect_nest').schedule par_trans = OMPParallelTrans() loop_trans = OMPLoopTrans() new_sched, _ = par_trans.apply( schedule[0].loop_body[1].else_body[0].else_body[0]) new_sched, _ = loop_trans.apply( new_sched[0].loop_body[1].else_body[0].else_body[0].children[0]) gen_code = str(psy.gen).lower() correct = ''' !$omp parallel default(shared), private(ji,jj) !$omp do schedule(static) do jj = 1, jpj, 1 do ji = 1, jpi, 1 zdkt(ji, jj) = (ptb(ji, jj, jk - 1, jn) - ptb(ji, jj, jk, jn)) * \ wmask(ji, jj, jk) end do end do !$omp end do !$omp end parallel''' assert correct in gen_code directive = new_sched[0].loop_body[1].else_body[0].else_body[0]\ .children[0] assert isinstance(directive, OMPDoDirective) # Call update a second time and make sure that this does not # trigger the whole update process again, and we get the same ast old_ast = directive.ast directive.update() assert directive.ast is old_ast # Remove the existing AST, so we can do more tests: directive.ast = None # Make the schedule invalid by adding a second child to the # OMPParallelDoDirective directive.children.append(new_sched[0].loop_body[3]) with pytest.raises(GenerationError) as err: _ = directive.update() assert ("An OpenMP DO can only be applied to a single loop but " "this Node has 2 children:" in str(err))
def test_omp_parallel(): ''' Check insertion of an OpenMP parallel region containing a single, explicit loop. ''' otrans = OMPParallelTrans() psy, invoke_info = get_invoke("explicit_do.f90", api=API, idx=0) schedule = invoke_info.schedule otrans.apply([schedule[0]]) gen_code = str(psy.gen).lower() assert (" !$omp parallel default(shared), private(ji,jj,jk)\n" " do jk = 1, jpk\n" " do jj = 1, jpj\n" " do ji = 1, jpi\n" " umask(ji, jj, jk) = ji * jj * jk / r\n" " end do\n" " end do\n" " end do\n" " !$omp end parallel\n" in gen_code)
def test_nemo_omp_parallel(): '''Tests if an OpenMP parallel directive in NEMO is handled correctly. ''' # Generate fparser2 parse tree from Fortran code. code = ''' module test contains subroutine tmp() integer :: i, a integer, dimension(:) :: b do i = 1, 20, 2 a = 2 * i b(i) = b(i) + a enddo end subroutine tmp end module test''' schedule = create_schedule(code, "tmp") from psyclone.transformations import OMPParallelTrans # Now apply a parallel transform omp_par = OMPParallelTrans() omp_par.apply(schedule[0]) fvisitor = FortranWriter() result = fvisitor(schedule) correct = '''!$omp parallel private(a,i) do i = 1, 20, 2 a=2 * i b(i)=b(i) + a enddo !$omp end parallel''' assert correct in result cvisitor = CWriter() result = cvisitor(schedule[0]) correct = '''#pragma omp parallel private(a,i) { for(i=1; i<=20; i+=2) { a = (2 * i); b[i] = (b[i] + a); } }''' result = cvisitor(schedule[0]) assert correct in result
def test_omp_parallel_errs(): ''' Check that we raise the expected errors when incorrectly attempting to add an OpenMP parallel region containing more than one node. ''' otrans = OMPParallelTrans() psy, invoke_info = get_invoke("imperfect_nest.f90", api=API, idx=0) schedule = invoke_info.schedule # Apply the OMP Parallel transformation so as to enclose the last two # loop nests (Python's slice notation is such that the expression below # gives elements 2-3). otrans.apply(schedule[0].loop_body[2:4]) directive = schedule[0].loop_body[2] # Break the AST by deleting some of it schedule[0].ast.content.remove(directive.children[0].ast) with pytest.raises(InternalError) as err: _ = psy.gen assert ("Failed to find locations to insert begin/end directives" in str(err.value))
def test_omp_region_nodes_not_children_of_same_parent(): ''' Test that we raise appropriate error if user attempts to put a region around nodes that are not children of the same parent ''' _, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompl = GOceanOMPParallelLoopTrans() ompr = OMPParallelTrans() # Put an OpenMP parallel do around the first loop in the schedule _, _ = ompl.apply(schedule.children[0]) # Attempt to put an OpenMP parallel region around that same loop # (which is now a child of an OpenMP loop directive) and the # second loop in the schedule with pytest.raises(TransformationError): _, _ = ompr.apply( [schedule.children[0].children[0], schedule.children[1]])
def test_omp_do_update(): '''Check the OMPDoDirective update function.''' psy, invoke = get_invoke("imperfect_nest.f90", api=API, idx=0) schedule = invoke.schedule par_trans = OMPParallelTrans() loop_trans = OMPLoopTrans() new_sched, _ = par_trans.apply(schedule[0].loop_body[1] .else_body[0].else_body[0]) new_sched, _ = loop_trans.apply(new_sched[0].loop_body[1] .else_body[0].else_body[0].dir_body[0]) gen_code = str(psy.gen).lower() correct = ''' !$omp parallel default(shared), private(ji,jj) !$omp do schedule(static) do jj = 1, jpj, 1 do ji = 1, jpi, 1 zdkt(ji, jj) = (ptb(ji, jj, jk - 1, jn) - ptb(ji, jj, jk, jn)) * \ wmask(ji, jj, jk) end do end do !$omp end do !$omp end parallel''' assert correct in gen_code directive = new_sched[0].loop_body[1].else_body[0].else_body[0]\ .dir_body[0] assert isinstance(directive, OMPDoDirective) # Call update a second time and make sure that this does not # trigger the whole update process again, and we get the same ast old_ast = directive.ast directive.update() assert directive.ast is old_ast # Remove the existing AST, so we can do more tests: directive.ast = None # Make the schedule invalid by adding a second child to the # OMPParallelDoDirective directive.dir_body.children.append(new_sched[0].loop_body[3]) with pytest.raises(GenerationError) as err: _ = directive.update() assert ("An OpenMP DO can only be applied to a single loop but " "this Node has 2 children:" in str(err.value))
def trans(psy): '''PSyclone transformation script for the dynamo0p3 API that applies loop colouring and OpenMP parallel loop parallelisation. It also outputs a textual representation of the transformated PSyIR. :param psy: a PSyclone PSy object which captures the algorithm and \ kernel information required by PSyclone. :type psy: subclass of :py:class:`psyclone.psyGen.PSy` ''' otrans = DynamoOMPParallelLoopTrans() ctrans = Dynamo0p3ColourTrans() ptrans = OMPParallelTrans() ltrans = Dynamo0p3OMPLoopTrans() const = LFRicConstants() for invoke in psy.invokes.invoke_list: schedule = invoke.schedule # Colour any loops that need colouring for loop in schedule.walk(Loop): if (loop.field_space.orig_name not in const.VALID_DISCONTINUOUS_NAMES and loop.iteration_space == "cell_column"): ctrans.apply(loop) # Add OpenMP parallel do directives to the loops for loop in schedule.walk(Loop): try: # Make sure reductions are reproducible if loop.reductions(): ptrans.apply(loop) ltrans.apply(loop, {"reprod": True}) else: otrans.apply(loop) except TransformationError as info: print(str(info.value)) # take a look at what we've done schedule.view() return psy
def test_omp_region_with_children_of_different_types(): ''' Test that we can generate code if we have an OpenMP parallel region enclosing children of different types. ''' psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0) schedule = invoke.schedule ompl = GOceanOMPLoopTrans() ompr = OMPParallelTrans() # Put an OpenMP do directive around one loop omp_schedule, _ = ompl.apply(schedule.children[1]) # Now enclose all of the children within a parallel region schedule, _ = ompr.apply(omp_schedule.children) # Replace the original loop schedule with the transformed one invoke.schedule = schedule # Attempt to generate the transformed code _ = psy.gen
def test_omp_parallel(): ''' Check insertion of an OpenMP parallel region containing a single, explicit loop. ''' from psyclone.transformations import OMPParallelTrans otrans = OMPParallelTrans() _, invoke_info = parse(os.path.join(BASE_PATH, "explicit_do.f90"), api=API, line_length=False) psy = PSyFactory(API, distributed_memory=False).create(invoke_info) schedule = psy.invokes.get('explicit_do').schedule schedule, _ = otrans.apply([schedule.children[0]]) gen_code = str(psy.gen).lower() assert (" !$omp parallel default(shared), private(jk,jj,ji)\n" " do jk = 1, jpk\n" " do jj = 1, jpj\n" " do ji = 1, jpi\n" " umask(ji, jj, jk) = ji * jj * jk / r\n" " end do\n" " end do\n" " end do\n" " !$omp end parallel\n" in gen_code)
def test_omp_parallel_multi(): ''' Check insertion of an OpenMP parallel region containing more than one node. ''' from psyclone.transformations import OMPParallelTrans from psyclone.psyGen import OMPParallelDirective otrans = OMPParallelTrans() _, invoke_info = parse(os.path.join(BASE_PATH, "imperfect_nest.f90"), api=API, line_length=False) psy = PSyFactory(API, distributed_memory=False).create(invoke_info) schedule = psy.invokes.get('imperfect_nest').schedule schedule.view() # Apply the OMP Parallel transformation so as to enclose the last two # loop nests (Python's slice notation is such that the expression below # gives elements 2-3). new_sched, _ = otrans.apply(schedule[0].loop_body[2:4]) new_sched.view() gen_code = str(psy.gen).lower() assert (" !$omp parallel default(shared), private(ji,jj,zabe1,zcof1," "zmsku)\n" " do jj = 1, jpjm1\n" " do ji = 1, fs_jpim1\n" " zabe1 = pahu(ji, jj, jk) * e2_e1u(ji, jj) * " "e3u_n(ji, jj, jk)\n" in gen_code) assert (" do jj = 2, jpjm1\n" " do ji = fs_2, fs_jpim1\n" " pta(ji, jj, jk, jn) = pta(ji, jj, jk, jn) + " "zsign * (zftu(ji, jj, jk) - zftu(ji - 1, jj, jk) + " "zftv(ji, jj, jk) - zftv(ji, jj - 1, jk)) * r1_e1e2t(ji, jj) / " "e3t_n(ji, jj, jk)\n" " end do\n" " end do\n" " !$omp end parallel\n" in gen_code) directive = new_sched[0].loop_body[2] assert isinstance(directive, OMPParallelDirective) # Check that further calls to the update() method don't change the # stored AST. old_ast = directive.ast directive.update() assert old_ast is directive.ast
def test_omp_parallel_errs(): ''' Check that we raise the expected errors when incorrectly attempting to add an OpenMP parallel region containing more than one node. ''' from psyclone.transformations import OMPParallelTrans otrans = OMPParallelTrans() _, invoke_info = parse(os.path.join(BASE_PATH, "imperfect_nest.f90"), api=API, line_length=False) psy = PSyFactory(API, distributed_memory=False).create(invoke_info) schedule = psy.invokes.get('imperfect_nest').schedule schedule.view() # Apply the OMP Parallel transformation so as to enclose the last two # loop nests (Python's slice notation is such that the expression below # gives elements 2-3). new_sched, _ = otrans.apply(schedule.children[0].children[2:4]) directive = new_sched.children[0].children[2] # Break the AST by deleting some of it _ = new_sched.children[0]._ast.content.remove(directive._children[0]._ast) with pytest.raises(InternalError) as err: _ = psy.gen assert ("Failed to find locations to insert begin/end directives" in str(err))
def trans(psy): ''' PSyclone transformation script for the dynamo0p3 API to apply loop fusion and OpenMP for a particular example.''' otrans = OMPParallelTrans() ltrans = Dynamo0p3OMPLoopTrans() ftrans = LFRicLoopFuseTrans() invoke = psy.invokes.invoke_list[0] schedule = invoke.schedule config = Config.get() if config.api_conf("dynamo0.3").compute_annexed_dofs and \ config.distributed_memory: # We can't loop fuse as the loop bounds differ so add # OpenMP parallel do directives to the loops otrans.apply(schedule.children[0]) otrans.apply(schedule.children[1]) else: # Loop fuse the two built-in kernels. The 'same_space' flag needs to # be set as built-ins are over ANY_SPACE. ftrans.apply(schedule[0], schedule[1], {"same_space": True}) # Add an OpenMP do directive to the resultant loop-fused loop, # specifying that we want reproducible reductions ltrans.apply(schedule.children[0], {"reprod": True}) # Add an OpenMP parallel directive around the OpenMP do directive otrans.apply(schedule.children[0]) # take a look at what we've done schedule.view() schedule.dag() return psy
def test_omp_region_before_loops_trans(): ''' Test of the OpenMP PARALLEL region transformation where we do the region transformation before the loop transformations. ''' psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0) schedule = invoke.schedule # Put all of the loops in the schedule within a single # OpenMP region ompr = OMPParallelTrans() omp_schedule, _ = ompr.apply(schedule.children) # Put an OpenMP do directive around each loop contained # in the region ompl = GOceanOMPLoopTrans() for child in omp_schedule.children[0].children: schedule, _ = ompl.apply(child) omp_schedule = schedule # Replace the original loop schedule with the transformed one invoke.schedule = omp_schedule # Store the results of applying this code transformation as # a string gen = str(psy.gen) # Iterate over the lines of generated code omp_region_idx = -1 omp_do_idx = -1 for idx, line in enumerate(gen.split('\n')): if '!$omp parallel default' in line: omp_region_idx = idx if '!$omp do' in line: omp_do_idx = idx if 'DO j=' in line: break assert omp_region_idx != -1 assert omp_do_idx != -1 assert omp_do_idx - omp_region_idx == 1