def test_profile_named_gocean1p0(): '''Check that the gocean 1.0 API is instrumented correctly when the profile name is supplied by the user. ''' psy, invoke = get_invoke("test11_different_iterates_over_one_invoke.f90", "gocean1.0", idx=0) schedule = invoke.schedule profile_trans = ProfileTrans() options = {"region_name": (psy.name, invoke.name)} _ = profile_trans.apply(schedule.children, options=options) result = str(invoke.gen()) assert ("CALL profile_psy_data%PreStart(" "\"psy_single_invoke_different_iterates_over\", " "\"invoke_0\", 0, 0)") in result
def test_transform_errors(capsys): '''Tests error handling of the profile region transformation.''' # This has been imported and tested before, so we can assume # here that this all works as expected/ _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule prt = ProfileTrans() # Just to be sure: also check that the right order does indeed work! sched1, _ = prt.apply([schedule.children[0], schedule.children[1], schedule.children[2]]) sched1.view() out, _ = capsys.readouterr() # out is unicode, and has no replace function, so convert to string first out = str(out).replace("\n", "") correct_re = (".*GOInvokeSchedule.*?" r"Profile.*?" r"Loop.*\[type='outer'.*?" r"Loop.*\[type='outer'.*?" r"Loop.*\[type='outer'") assert re.search(correct_re, out) # Test that we don't add a profile node inside a OMP do loop (which # would be invalid syntax): _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule prt = ProfileTrans() omp_loop = GOceanOMPLoopTrans() # Parallelise the first loop: omp_loop.apply(schedule[0]) # Inserting a ProfileTrans inside a omp do loop is syntactically # incorrect, the inner part must be a do loop only: with pytest.raises(TransformationError) as excinfo: prt.apply(invoke.schedule[0].dir_body[0]) assert "A PSyData node cannot be inserted between an OpenMP/ACC "\ "directive and the loop(s) to which it applies!" \ in str(excinfo.value) with pytest.raises(TransformationError) as excinfo: prt.apply(invoke.schedule[0], {"region_name": "xx"}) assert "Error in ProfileTrans. User-supplied region name must be a " \ "tuple containing two non-empty strings" in str(excinfo.value)
def add_profile_nodes(schedule, loop_class): '''This function inserts all required Profiling Nodes (for invokes and kernels, as specified on the command line) into a schedule. An invoke will not be profiled if it contains more than one Return or if the Return is not the last statement. :param schedule: The schedule to instrument. :type schedule: :py:class:`psyclone.psyGen.InvokeSchedule` or subclass :param loop_class: The loop class (e.g. GOLoop, DynLoop) to instrument. :type loop_class: :py:class:`psyclone.psyir.nodes.Loop` or subclass ''' profile_trans = ProfileTrans() if Profiler.profile_kernels(): kernels = schedule.walk(Kern) for kernel in kernels: # For each kernel, we walk back up to find the outermost loop # of the specified class target = None parent_loop = kernel.ancestor(loop_class) while parent_loop: nchildren = len(parent_loop.loop_body.children) if nchildren != 1: # We only permit tightly-nested loops break target = parent_loop parent_loop = parent_loop.ancestor(loop_class) # We only add profiling if we're not within some OpenACC # region (as otherwise, the PSyData routines being called # would have to be compiled for the GPU). if target and not target.ancestor(ACCDirective): # Have to take care that the target loop does not have # a directive applied to it. We distinguish this case # from that of a directive defining a region by checking # the number of children of the directive. if (isinstance(target.parent.parent, Directive) and len(target.parent.parent.dir_body.children) == 1): # Parent is a Directive that has only the current # loop as a child. Therefore, enclose the Directive # within the profiling region too. target = target.parent.parent profile_trans.apply(target) if Profiler.profile_invokes(): # We cannot include Return statements within profiling regions returns = schedule.walk(Return) if returns: if len(returns) == 1 and returns[0] is schedule.children[-1]: # There's only one Return and it's the last statement so # simply exclude it from the profiling region. profile_trans.apply(schedule.children[:-1]) else: # TODO #11 use logging instead. print("Not adding profiling to routine '{0}' because it " "contains one or more Return statements.".format( schedule.name), file=sys.stderr) else: profile_trans.apply(schedule.children)
def test_region(): ''' Tests that the profiling transform works correctly when a region of code is specified that does not cover the full invoke and also contains multiple kernels. ''' _, invoke = get_invoke("3.1_multi_functions_multi_invokes.f90", "dynamo0.3", name="invoke_0", dist_mem=True) schedule = invoke.schedule prt = ProfileTrans() # Just halo exchanges. _ = prt.apply(schedule[0:4]) # Two loops. _ = prt.apply(schedule[1:3]) result = str(invoke.gen()) assert ("CALL profile_psy_data%PreStart(\"multi_functions_multi_invokes_" "psy\", \"invoke_0:r0\", 0, 0)" in result) assert ("CALL profile_psy_data_1%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r1\", 0, 0)" in result) # Make nested profiles. _ = prt.apply(schedule[1].profile_body[1]) _ = prt.apply(schedule) result = str(invoke.gen()) assert ("CALL profile_psy_data_3%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r0\", 0, 0)" in result) assert ("CALL profile_psy_data%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r1\", 0, 0)" in result) assert ("CALL profile_psy_data_1%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r2\", 0, 0)" in result) assert ("CALL profile_psy_data_2%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:testkern_code:r3\", 0, 0)" in result)
def test_omp_transform(): '''Tests that the profiling transform works correctly with OMP parallelisation.''' _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule # This test expects constant loop bounds schedule._const_loop_bounds = True prt = ProfileTrans() omp_loop = GOceanOMPLoopTrans() omp_par = OMPParallelTrans() # Parallelise the first loop: omp_loop.apply(schedule[0]) omp_par.apply(schedule[0]) prt.apply(schedule[0]) correct = ( " CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " "\"invoke_loop1:bc_ssh_code:r0\", 0, 0)\n" " !$omp parallel default(shared), private(i,j)\n" " !$omp do schedule(static)\n" " DO j=2,jstop\n" " DO i=2,istop\n" " CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n" " END DO\n" " END DO\n" " !$omp end do\n" " !$omp end parallel\n" " CALL profile_psy_data%PostEnd") code = str(invoke.gen()) assert correct in code # Now add another profile node between the omp parallel and omp do # directives: prt.apply(schedule[0].profile_body[0].dir_body[0]) code = str(invoke.gen()) correct = \ "CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " + \ '''"invoke_loop1:bc_ssh_code:r0", 0, 0) !$omp parallel default(shared), private(i,j) CALL profile_psy_data_1%PreStart("psy_test27_loop_swap", ''' + \ '''"invoke_loop1:bc_ssh_code:r1", 0, 0) !$omp do schedule(static) DO j=2,jstop DO i=2,istop CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask) END DO END DO !$omp end do CALL profile_psy_data_1%PostEnd !$omp end parallel CALL profile_psy_data%PostEnd''' assert correct in code
def test_multi_prefix_profile(monkeypatch): ''' Tests that the profiling transform works correctly when we use two different profiling tools in the same invoke. ''' _, invoke = get_invoke("3.1_multi_functions_multi_invokes.f90", "dynamo0.3", name="invoke_0", dist_mem=True) schedule = invoke.schedule prt = ProfileTrans() config = Config.get() # Monkeypatch the list of recognised PSyData prefixes monkeypatch.setattr(config, "_valid_psy_data_prefixes", ["profile", "tool1"]) # Use the 'tool1' prefix for the region around the halo exchanges. _ = prt.apply(schedule[0:4], options={"prefix": "tool1"}) # Use the default prefix for the two loops. _ = prt.apply(schedule[1:3]) result = str(invoke.gen()) assert (" USE profile_psy_data_mod, ONLY: profile_PSyDataType\n" in result) assert " USE tool1_psy_data_mod, ONLY: tool1_PSyDataType" in result assert (" TYPE(profile_PSyDataType), target, save :: " "profile_psy_data\n" " TYPE(tool1_PSyDataType), target, save :: tool1_psy_data" in result) assert (" ! Call kernels and communication routines\n" " !\n" " CALL tool1_psy_data%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r0\", 0, 0)\n" " IF (f1_proxy%is_dirty(depth=1)) THEN\n" in result) assert (" CALL tool1_psy_data%PostEnd\n" " CALL profile_psy_data%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r1\", 0, 0)\n" " DO cell=1,mesh%get_last_halo_cell(1)" in result) assert (" CALL f1_proxy%set_dirty()\n" " !\n" " CALL profile_psy_data%PostEnd\n" " DO cell=1,mesh%get_last_halo_cell(1)" in result)
def trans(psy): '''A PSyclone-script compliant transformation function. Adds profiling to an invoke. :param psy: The PSy layer object to apply transformations to. :type psy: :py:class:`psyclone.psyGen.PSy` ''' print("Invokes found:\n{0}\n".format("\n".join( [str(name) for name in psy.invokes.names]))) p_trans = ProfileTrans() for invoke in psy.invokes.invoke_list: sched = invoke.schedule if not sched: print("Invoke {0} has no Schedule! Skipping...".format( invoke.name)) continue # Enclose all children of the schedule within a single profile region p_trans.apply(sched.children) sched.view()
def trans(psy): ''' Take the supplied psy object, add OpenACC directives and then enclose the whole schedule within a profiling region. :param psy: the PSy layer to transform. :type psy: :py:class:`psyclone.gocean1p0.GOPSy` :returns: the transformed PSy object. :rtype: :py:class:`psyclone.gocean1p0.GOPSy` ''' proftrans = ProfileTrans() # Use the trans() routine in acc_transform.py to add the OpenACC directives psy = acc_trans(psy) invoke = psy.invokes.get('invoke_0_inc_field') schedule = invoke.schedule # Enclose everything in a profiling region proftrans.apply(schedule.children) schedule.view() return psy
def add_profile_nodes(schedule, loop_class): '''This function inserts all required Profiling Nodes (for invokes and kernels, as specified on the command line) into a schedule. :param schedule: The schedule to instrument. :type schedule: :py:class:`psyclone.psyGen.InvokeSchedule` or \ derived class :param loop_class: The loop class (e.g. GOLoop, DynLoop) to instrument. :type loop_class: :py:class:`psyclone.psyir.nodes.Loop` or \ derived class. ''' from psyclone.psyir.transformations import ProfileTrans profile_trans = ProfileTrans() if Profiler.profile_kernels(): for i in schedule.children: if isinstance(i, loop_class): profile_trans.apply(i) if Profiler.profile_invokes(): profile_trans.apply(schedule.children)
def test_no_psydata_in_kernels(parser, monkeypatch): ''' Check that we refuse to generate code when a kernels region contains PSyData calls. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule ptrans = ProfileTrans() acc_trans = ACCKernelsTrans() acc_trans.apply(schedule[0]) # Attempt to put a profiling call within the loop assign = schedule.walk(Assignment)[0] with pytest.raises(TransformationError) as err: ptrans.apply(assign) assert ("A PSyData node cannot be inserted inside an OpenACC region" in str(err.value)) # Monkeypatch the validate() method so as to avoid the checking # that it does monkeypatch.setattr(ptrans, "validate", lambda x, y: None) ptrans.apply(assign) # Check that an appropriate error is raised at code-generation time with pytest.raises(GenerationError) as err: _ = psy.gen assert ("Cannot include calls to PSyData routines within OpenACC " "regions" in str(err.value))
representation of NEMO code to insert profiling calls. ''' from __future__ import absolute_import, print_function import re import pytest from fparser.common.readfortran import FortranStringReader from psyclone.errors import InternalError from psyclone.configuration import Config from psyclone.psyGen import PSyFactory from psyclone.psyir.nodes import PSyDataNode from psyclone.psyir.transformations import ProfileTrans, TransformationError # The transformation that most of these tests use PTRANS = ProfileTrans() @pytest.fixture(scope="module", autouse=True) def setup(): '''Make sure that all tests here use the nemo API, and that we clean up the config file at the end of the tests.''' Config.get().api = "nemo" yield() # At the end of all tests make sure that we wipe the Config object # so we get a fresh/default one for any further test (and not a # left-over one from a test here). Config._instance = None
def test_profile_basic(capsys): '''Check basic functionality: node names, schedule view. ''' Profiler.set_options([Profiler.INVOKES]) _, invoke = get_invoke("test11_different_iterates_over_one_invoke.f90", "gocean1.0", idx=0, dist_mem=False) # This test expects constant loop bounds invoke.schedule._const_loop_bounds = True Profiler.add_profile_nodes(invoke.schedule, Loop) assert isinstance(invoke.schedule[0], ProfileNode) invoke.schedule.view() out, _ = capsys.readouterr() gsched = colored("GOInvokeSchedule", GOInvokeSchedule._colour) sched = colored("Schedule", Schedule._colour) loop = Loop().coloured_name(True) profile = invoke.schedule[0].coloured_name(True) # Do one test based on schedule view, to make sure colouring # and indentation is correct expected = (gsched + "[invoke='invoke_0', Constant loop bounds=True]\n" " 0: " + profile + "[]\n" " " + sched + "[]\n" " 0: " + loop + "[type='outer', field_space='go_cv', " "it_space='go_internal_pts']\n") assert expected in out prt = ProfileTrans() # Insert a profile call between outer and inner loop. # This tests that we find the subroutine node even # if it is not the immediate parent. new_sched, _ = prt.apply(invoke.schedule[0].profile_body[0].loop_body[0]) new_sched_str = str(new_sched) correct = ("""GOInvokeSchedule[invoke='invoke_0', \ Constant loop bounds=True]: ProfileStart[var=profile_psy_data] GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop-1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: ProfileStart[var=profile_psy_data_1] GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: compute_cv_code End Schedule End GOLoop ProfileEnd End Schedule End GOLoop GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_ssh_code End Schedule End GOLoop End Schedule End GOLoop ProfileEnd End Schedule""") assert correct in new_sched_str Profiler.set_options(None)
def test_transform(capsys): '''Tests normal behaviour of profile region transformation.''' # pylint: disable=too-many-locals _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule # This test expects constant loop bounds schedule._const_loop_bounds = True prt = ProfileTrans() assert str(prt) == "Create a sub-tree of the PSyIR that has " \ "a node of type ProfileNode at its root." assert prt.name == "ProfileTrans" # Try applying it to a list sched1, _ = prt.apply(schedule.children) correct = ("""GOInvokeSchedule[invoke='invoke_loop1', \ Constant loop bounds=True]: ProfileStart[var=profile_psy_data] GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_ssh_code End Schedule End GOLoop End Schedule End GOLoop GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_solid_u_code End Schedule End GOLoop End Schedule End GOLoop GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_solid_v_code End Schedule End GOLoop End Schedule End GOLoop ProfileEnd End Schedule""") assert correct in str(sched1) # Now only wrap a single node - the middle loop: sched2, _ = prt.apply(schedule[0].profile_body[1]) correct = ("""GOInvokeSchedule[invoke='invoke_loop1', \ Constant loop bounds=True]: ProfileStart[var=profile_psy_data] GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_ssh_code End Schedule End GOLoop End Schedule End GOLoop ProfileStart[var=profile_psy_data_1] GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_solid_u_code End Schedule End GOLoop End Schedule End GOLoop ProfileEnd GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_solid_v_code End Schedule End GOLoop End Schedule End GOLoop ProfileEnd End Schedule""") assert correct in str(sched2) # Check that a sublist created from individual elements # can be wrapped sched3, _ = prt.apply( [sched2[0].profile_body[0], sched2[0].profile_body[1]]) sched3.view() out, _ = capsys.readouterr() gsched = colored("GOInvokeSchedule", GOInvokeSchedule._colour) prof = colored("Profile", ProfileNode._colour) sched = colored("Schedule", Schedule._colour) loop = colored("Loop", Loop._colour) indent = 4 * " " correct = (gsched + "[invoke='invoke_loop1', Constant loop bounds=True]\n" + indent + "0: " + prof + "[]\n" + 2 * indent + sched + "[]\n" + 3 * indent + "0: " + prof + "[]\n" + 4 * indent + sched + "[]\n" + 5 * indent + "0: " + loop + "[type='outer', field_space='go_ct'," " it_space='go_internal_pts']\n") assert correct in out correct2 = (5 * indent + "1: " + prof + "[]\n" + 6 * indent + sched + "[]\n" + 7 * indent + "0: " + loop + "[type='outer', field_space='go_cu'," " it_space='go_all_pts']\n") assert correct2 in out
from psyclone.psyir.transformations import TransformationError, ProfileTrans from psyclone.psyir.nodes import IfBlock, CodeBlock, Schedule, \ ArrayReference, Assignment, BinaryOperation, NaryOperation, Loop, \ Literal, Return, Call from psyclone.psyir.symbols import ScalarType from psyclone.nemo import NemoInvokeSchedule, NemoKern, NemoLoop from psyclone.errors import InternalError # Which version of the NVIDIA (PGI) compiler we are targetting (different # versions have different bugs that we have to workaround). PGI_VERSION = 1940 # i.e. 19.4 # Get the PSyclone transformations we will use ACC_KERN_TRANS = TransInfo().get_trans_name('ACCKernelsTrans') ACC_LOOP_TRANS = TransInfo().get_trans_name('ACCLoopTrans') PROFILE_TRANS = ProfileTrans() # Whether or not to automatically add profiling calls around # un-accelerated regions _AUTO_PROFILE = True # If routine names contain these substrings then we do not profile them PROFILING_IGNORE = [ "_init", "_rst", "alloc", "agrif", "flo_dom", "ice_thd_pnd", "macho", "mpp_", "nemo_gcm",