def test_no_parent_accdirective(): ''' Test that applying Extract Transformation on an orphaned ACCLoopDirective without its ancestor ACCParallelDirective when optimisations are applied raises a TransformationError. ''' etrans = GOceanExtractTrans() acclpt = ACCLoopTrans() accpara = ACCParallelTrans() accdata = ACCEnterDataTrans() _, invoke = get_invoke("single_invoke_three_kernels.f90", GOCEAN_API, idx=0, dist_mem=False) schedule = invoke.schedule # Apply the OpenACC Loop transformation to every loop in the Schedule for child in schedule.children: if isinstance(child, Loop): acclpt.apply(child) # Enclose all of these loops within a single ACC Parallel region accpara.apply(schedule.children) # Add a mandatory ACC enter-data directive accdata.apply(schedule) orphaned_directive = schedule.children[1].children[0] with pytest.raises(TransformationError) as excinfo: _, _ = etrans.apply(orphaned_directive) assert "Error in GOceanExtractTrans: Application to Nodes enclosed " \ "within a thread-parallel region is not allowed." \ in str(excinfo.value)
def trans(psy): ''' Take the supplied psy object, apply OpenACC transformations to the schedule of invoke_0 and return the new psy object ''' ptrans = ACCParallelTrans() ltrans = ACCLoopTrans() dtrans = ACCEnterDataTrans() ktrans = ACCRoutineTrans() invoke = psy.invokes.get('invoke_0_inc_field') schedule = invoke.schedule schedule.view() # Apply the OpenACC Loop transformation to *every* loop # nest in the schedule for child in schedule.children: if isinstance(child, Loop): ltrans.apply(child, {"collapse": 2}) # Put all of the loops in a single parallel region ptrans.apply(schedule.children) # Add an enter-data directive dtrans.apply(schedule) # Put an 'acc routine' directive inside each kernel for kern in schedule.coded_kernels(): ktrans.apply(kern) # Ideally we would module-inline the kernel here (to save having to # rely on the compiler to do it) but this does not currently work # for the fparser2 AST (issue #229). # _, _ = itrans.apply(kern) schedule.view() return psy
def collapse_loops(nodes): ''' Searches the supplied list of nodes and applies an ACC LOOP COLLAPSE(2) directive to any perfectly-nested lon-lat loops. :param nodes: list of nodes to search for loops. :type nodes: list of :py:class:`psyclone.psyir.nodes.Node` ''' loop_trans = ACCLoopTrans() for node in nodes: loops = node.walk(Loop) for loop in loops: if loop.ancestor(ACCLoopDirective): # We've already transformed a parent Loop so skip this one. continue loop_options = {} # We put a COLLAPSE(2) clause on any perfectly-nested lon-lat # loops that have a Literal value for their step. The latter # condition is necessary to avoid compiler errors with 20.7. if loop.loop_type == "lat" and \ isinstance(loop.step_expr, Literal) and \ isinstance(loop.loop_body[0], Loop) and \ loop.loop_body[0].loop_type == "lon" and \ isinstance(loop.loop_body[0].step_expr, Literal) and \ len(loop.loop_body.children) == 1: loop_options["collapse"] = 2 if loop_options: loop_trans.apply(loop, loop_options)
def test_no_parent_accdirective(): ''' Test that applying Extract Transformation on an orphaned ACCLoopDirective without its ancestor ACCParallelDirective when optimisations are applied raises a TransformationError. ''' from psyclone.transformations import ACCParallelTrans, ACCEnterDataTrans, \ ACCLoopTrans etrans = GOceanExtractRegionTrans() acclpt = ACCLoopTrans() accpara = ACCParallelTrans() accdata = ACCEnterDataTrans() _, invoke_info = parse(os.path.join(GOCEAN_BASE_PATH, "single_invoke_three_kernels.f90"), api=GOCEAN_API) psy = PSyFactory(GOCEAN_API, distributed_memory=False).create(invoke_info) invoke = psy.invokes.invoke_list[0] schedule = invoke.schedule # Apply the OpenACC Loop transformation to every loop in the Schedule for child in schedule.children: if isinstance(child, Loop): schedule, _ = acclpt.apply(child) # Enclose all of these loops within a single ACC Parallel region schedule, _ = accpara.apply(schedule.children) # Add a mandatory ACC enter-data directive schedule, _ = accdata.apply(schedule) orphaned_directive = schedule.children[1].children[0] with pytest.raises(TransformationError) as excinfo: _, _ = etrans.apply(orphaned_directive) assert ("Extraction of Nodes enclosed within a thread parallel " "region is not allowed.") in str(excinfo)
def test_loop_after_implicit_kernels(parser): ''' Test the addition of a loop directive after some implicit loops within a kernels region. ''' reader = FortranStringReader("program two_loops\n" " integer :: ji\n" " real :: array(10,10)\n" " array(:,:) = -1.0\n" " do ji = 1, 5\n" " array(ji,1) = 2.0*array(ji,2)\n" " end do\n" "end program two_loops\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() loop_trans = ACCLoopTrans() acc_trans.apply(schedule[0:2]) loop_trans.apply(schedule[0].dir_body[1]) output = str(psy.gen).lower() assert (" !$acc kernels\n" " array(:, :) = - 1.0\n" " !$acc loop independent\n" " do ji = 1, 5\n" in output) assert (" end do\n" " !$acc end kernels\n" "end program" in output)
def test_accloop(): ''' Generic tests for the ACCLoopTrans transformation class ''' trans = ACCLoopTrans() assert trans.name == "ACCLoopTrans" assert str(trans) == "Adds an 'OpenACC loop' directive to a loop" cnode = Statement() tdir = trans._directive([cnode]) assert isinstance(tdir, ACCLoopDirective)
def test_accloop(): ''' Generic tests for the ACCLoopTrans transformation class ''' from psyclone.transformations import ACCLoopTrans from psyclone.psyGen import ACCLoopDirective trans = ACCLoopTrans() assert trans.name == "ACCLoopTrans" assert str(trans) == "Adds an 'OpenACC loop' directive to a loop" pnode = Node() cnode = Statement() tdir = trans._directive(pnode, [cnode]) assert isinstance(tdir, ACCLoopDirective)
def test_loop_inside_kernels(parser): ''' Check that we can put an ACC LOOP directive inside a KERNELS region. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply([schedule[0]]) loop_trans = ACCLoopTrans() loop_trans.apply(schedule[0].dir_body[0]) output = str(psy.gen).lower() assert (" !$acc kernels\n" " !$acc loop independent\n" " do ji = 1, jpj\n" in output) assert (" end do\n" " !$acc end kernels\n" in output)
def trans(psy): ''' Take the supplied psy object, apply OpenACC transformations to the schedule of the first invoke and return the new psy object ''' from psyclone.transformations import ACCParallelTrans, \ ACCEnterDataTrans, ACCLoopTrans, ACCRoutineTrans, \ KernelGlobalsToArguments ptrans = ACCParallelTrans() ltrans = ACCLoopTrans() dtrans = ACCEnterDataTrans() ktrans = ACCRoutineTrans() g2localtrans = KernelGlobalsToArguments() invoke = psy.invokes.invoke_list[0] schedule = invoke.schedule schedule.view() # Apply the OpenACC Loop transformation to *every* loop # nest in the schedule from psyclone.psyir.nodes import Loop for child in schedule.children: if isinstance(child, Loop): newschedule, _ = ltrans.apply(child, {"collapse": 2}) schedule = newschedule # Put all of the loops in a single parallel region newschedule, _ = ptrans.apply(schedule.children) # Add an enter-data directive newschedule, _ = dtrans.apply(schedule) # Convert any accesses to global data into kernel arguments and then # put an 'acc routine' directive inside each kernel for kern in schedule.coded_kernels(): if kern.name == "kern_use_var_code": # TODO #490 and #663. This currently won't work because the # KernelGlobalsToArguments transformation works on the PSyIR but # the subsequent ACCRoutineTrans works on the fparser2 parse tree. g2localtrans.apply(kern) _, _ = ktrans.apply(kern) # Ideally we would module-inline the kernel here (to save having to # rely on the compiler to do it) but this does not currently work # for the fparser2 AST (issue #229). # _, _ = itrans.apply(kern) invoke.schedule = newschedule newschedule.view() return psy
def trans(psy): '''PSyclone transformation script for the dynamo0p3 api to apply OpenACC loop, parallel and enter data directives generically. :param psy: a PSyclone PSy object which captures the algorithm and \ kernel information required by PSyclone. :type psy: subclass of :py:class:`psyclone.psyGen.PSy` ''' kernels_trans = ACCKernelsTrans() routine_trans = ACCRoutineTrans() ctrans = Dynamo0p3ColourTrans() loop_trans = ACCLoopTrans() enter_trans = ACCEnterDataTrans() const = LFRicConstants() # Loop over all of the Invokes in the PSy object for invoke in psy.invokes.invoke_list: schedule = invoke.schedule # Colour loops as required for loop in schedule.loops(): if loop.field_space.orig_name \ not in const.VALID_DISCONTINUOUS_NAMES \ and loop.iteration_space == "cell_column": ctrans.apply(loop) # Add Kernels and Loop directives for loop in schedule.loops(): if loop.loop_type != "colours": kernels_trans.apply([loop]) loop_trans.apply(loop) # Add Routine directive to kernels for kernel in schedule.coded_kernels(): routine_trans.apply(kernel) # Add Enter Data directive covering all of the PSy layer. enter_trans.apply(schedule) schedule.view() return psy
def trans(psy): ''' Take the supplied psy object, apply OpenACC transformations to the schedule of the first invoke and return the new psy object ''' from psyclone.transformations import ACCParallelTrans, \ ACCEnterDataTrans, ACCLoopTrans, ACCRoutineTrans, \ KernelGlobalsToArguments, KernelModuleInlineTrans ptrans = ACCParallelTrans() ltrans = ACCLoopTrans() dtrans = ACCEnterDataTrans() ktrans = ACCRoutineTrans() itrans = KernelModuleInlineTrans() g2localtrans = KernelGlobalsToArguments() invoke = psy.invokes.invoke_list[0] schedule = invoke.schedule schedule.view() # Apply the OpenACC Loop transformation to *every* loop # nest in the schedule from psyclone.psyir.nodes import Loop for child in schedule.children: if isinstance(child, Loop): newschedule, _ = ltrans.apply(child, {"collapse": 2}) schedule = newschedule # Put all of the loops in a single parallel region newschedule, _ = ptrans.apply(schedule.children) # Add an enter-data directive newschedule, _ = dtrans.apply(schedule) # Convert any accesses to global data into kernel arguments, put an # 'acc routine' directive inside, and module-inline each kernel for kern in schedule.coded_kernels(): if kern.name == "kern_use_var_code": g2localtrans.apply(kern) _, _ = ktrans.apply(kern) _, _ = itrans.apply(kern) invoke.schedule = newschedule newschedule.view() return psy
def trans(psy): '''PSyclone transformation script for the dynamo0p3 api to apply OpenACC loop, parallel and enter data directives generically. ''' loop_trans = ACCLoopTrans() parallel_trans = ACCParallelTrans() enter_data_trans = ACCEnterDataTrans() # Loop over all of the Invokes in the PSy object for invoke in psy.invokes.invoke_list: print("Transforming invoke '" + invoke.name + "'...") schedule = invoke.schedule for loop in schedule.loops(): _, _ = loop_trans.apply(loop) _, _ = parallel_trans.apply(loop.parent) _, _ = enter_data_trans.apply(schedule) return psy
def trans(psy): ''' Take the supplied psy object, apply OpenACC transformations to the schedule of invoke_0 and return the new psy object ''' from psyclone.transformations import ACCParallelTrans, \ ACCDataTrans, ACCLoopTrans, ACCRoutineTrans, KernelModuleInlineTrans ptrans = ACCParallelTrans() ltrans = ACCLoopTrans() dtrans = ACCDataTrans() ktrans = ACCRoutineTrans() itrans = KernelModuleInlineTrans() invoke = psy.invokes.get('invoke_0_inc_field') schedule = invoke.schedule # schedule.view() # Apply the OpenACC Loop transformation to *every* loop # nest in the schedule from psyclone.psyGen import Loop for child in schedule.children: if isinstance(child, Loop): newschedule, _ = ltrans.apply(child, collapse=2) schedule = newschedule # Put all of the loops in a single parallel region newschedule, _ = ptrans.apply(schedule.children) # Add an enter-data directive newschedule, _ = dtrans.apply(schedule) # Put an 'acc routine' directive inside each kernel for kern in schedule.kern_calls(): _, _ = ktrans.apply(kern) # Ideally we would module-inline the kernel here (to save having to # rely on the compiler to do it) but this does not currently work # for the fparser2 AST (issue #229). # _, _ = itrans.apply(kern) invoke.schedule = newschedule newschedule.view() return psy
def test_two_loops_inside_kernels(parser): ''' Check that we can mark-up one or both loops inside a KERNELS region containing two loops. ''' reader = FortranStringReader("program two_loops\n" " integer :: ji\n" " real :: array(10)\n" " do ji = 1, 10\n" " array(ji) = 1.0\n" " end do\n" " do ji = 1, 5\n" " array(ji) = 2.0*array(ji)\n" " end do\n" "end program two_loops\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule # Enclose both loops within a KERNELS region acc_trans = ACCKernelsTrans() acc_trans.apply(schedule[0:2]) # Apply a loop transformation to just the first loop loop_trans = ACCLoopTrans() loop_trans.apply(schedule[0].dir_body[0]) output = str(psy.gen).lower() assert (" !$acc kernels\n" " !$acc loop independent\n" " do ji = 1, 10\n" in output) assert (" end do\n" " !$acc end kernels\n" "end program" in output) loop_trans.apply(schedule[0].dir_body[1]) output = str(psy.gen).lower() assert (" !$acc loop independent\n" " do ji = 1, 5\n" in output) assert (" end do\n" " !$acc end kernels\n" "end program" in output)