Beispiel #1
0
def test_profile_named_gocean1p0():
    '''Check that the gocean 1.0 API is instrumented correctly when the
    profile name is supplied by the user.

    '''
    psy, invoke = get_invoke("test11_different_iterates_over_one_invoke.f90",
                             "gocean1.0",
                             idx=0)
    schedule = invoke.schedule
    profile_trans = ProfileTrans()
    options = {"region_name": (psy.name, invoke.name)}
    _ = profile_trans.apply(schedule.children, options=options)
    result = str(invoke.gen())
    assert ("CALL profile_psy_data%PreStart("
            "\"psy_single_invoke_different_iterates_over\", "
            "\"invoke_0\", 0, 0)") in result
Beispiel #2
0
def test_transform_errors(capsys):
    '''Tests error handling of the profile region transformation.'''

    # This has been imported and tested before, so we can assume
    # here that this all works as expected/
    _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0",
                           name="invoke_loop1", dist_mem=False)

    schedule = invoke.schedule
    prt = ProfileTrans()

    # Just to be sure: also check that the right order does indeed work!
    sched1, _ = prt.apply([schedule.children[0],
                           schedule.children[1],
                           schedule.children[2]])
    sched1.view()
    out, _ = capsys.readouterr()
    # out is unicode, and has no replace function, so convert to string first
    out = str(out).replace("\n", "")

    correct_re = (".*GOInvokeSchedule.*?"
                  r"Profile.*?"
                  r"Loop.*\[type='outer'.*?"
                  r"Loop.*\[type='outer'.*?"
                  r"Loop.*\[type='outer'")
    assert re.search(correct_re, out)

    # Test that we don't add a profile node inside a OMP do loop (which
    # would be invalid syntax):
    _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0",
                           name="invoke_loop1", dist_mem=False)
    schedule = invoke.schedule

    prt = ProfileTrans()
    omp_loop = GOceanOMPLoopTrans()

    # Parallelise the first loop:
    omp_loop.apply(schedule[0])

    # Inserting a ProfileTrans inside a omp do loop is syntactically
    # incorrect, the inner part must be a do loop only:
    with pytest.raises(TransformationError) as excinfo:
        prt.apply(invoke.schedule[0].dir_body[0])

    assert "A PSyData node cannot be inserted between an OpenMP/ACC "\
           "directive and the loop(s) to which it applies!" \
           in str(excinfo.value)

    with pytest.raises(TransformationError) as excinfo:
        prt.apply(invoke.schedule[0], {"region_name": "xx"})
    assert "Error in ProfileTrans. User-supplied region name must be a " \
        "tuple containing two non-empty strings" in str(excinfo.value)
Beispiel #3
0
    def add_profile_nodes(schedule, loop_class):
        '''This function inserts all required Profiling Nodes (for invokes
        and kernels, as specified on the command line) into a schedule. An
        invoke will not be profiled if it contains more than one Return or if
        the Return is not the last statement.

        :param schedule: The schedule to instrument.
        :type schedule: :py:class:`psyclone.psyGen.InvokeSchedule` or subclass
        :param loop_class: The loop class (e.g. GOLoop, DynLoop) to instrument.
        :type loop_class: :py:class:`psyclone.psyir.nodes.Loop` or subclass

        '''
        profile_trans = ProfileTrans()
        if Profiler.profile_kernels():
            kernels = schedule.walk(Kern)
            for kernel in kernels:
                # For each kernel, we walk back up to find the outermost loop
                # of the specified class
                target = None
                parent_loop = kernel.ancestor(loop_class)
                while parent_loop:
                    nchildren = len(parent_loop.loop_body.children)
                    if nchildren != 1:
                        # We only permit tightly-nested loops
                        break
                    target = parent_loop
                    parent_loop = parent_loop.ancestor(loop_class)
                # We only add profiling if we're not within some OpenACC
                # region (as otherwise, the PSyData routines being called
                # would have to be compiled for the GPU).
                if target and not target.ancestor(ACCDirective):
                    # Have to take care that the target loop does not have
                    # a directive applied to it. We distinguish this case
                    # from that of a directive defining a region by checking
                    # the number of children of the directive.
                    if (isinstance(target.parent.parent, Directive) and
                            len(target.parent.parent.dir_body.children) == 1):
                        # Parent is a Directive that has only the current
                        # loop as a child. Therefore, enclose the Directive
                        # within the profiling region too.
                        target = target.parent.parent
                    profile_trans.apply(target)
        if Profiler.profile_invokes():
            # We cannot include Return statements within profiling regions
            returns = schedule.walk(Return)
            if returns:
                if len(returns) == 1 and returns[0] is schedule.children[-1]:
                    # There's only one Return and it's the last statement so
                    # simply exclude it from the profiling region.
                    profile_trans.apply(schedule.children[:-1])
                else:
                    # TODO #11 use logging instead.
                    print("Not adding profiling to routine '{0}' because it "
                          "contains one or more Return statements.".format(
                              schedule.name), file=sys.stderr)
            else:
                profile_trans.apply(schedule.children)
Beispiel #4
0
def test_region():
    ''' Tests that the profiling transform works correctly when a region of
    code is specified that does not cover the full invoke and also
    contains multiple kernels.

    '''
    _, invoke = get_invoke("3.1_multi_functions_multi_invokes.f90",
                           "dynamo0.3",
                           name="invoke_0",
                           dist_mem=True)
    schedule = invoke.schedule
    prt = ProfileTrans()
    # Just halo exchanges.
    _ = prt.apply(schedule[0:4])
    # Two loops.
    _ = prt.apply(schedule[1:3])
    result = str(invoke.gen())
    assert ("CALL profile_psy_data%PreStart(\"multi_functions_multi_invokes_"
            "psy\", \"invoke_0:r0\", 0, 0)" in result)
    assert ("CALL profile_psy_data_1%PreStart(\"multi_functions_multi_"
            "invokes_psy\", \"invoke_0:r1\", 0, 0)" in result)
    # Make nested profiles.
    _ = prt.apply(schedule[1].profile_body[1])
    _ = prt.apply(schedule)
    result = str(invoke.gen())
    assert ("CALL profile_psy_data_3%PreStart(\"multi_functions_multi_"
            "invokes_psy\", \"invoke_0:r0\", 0, 0)" in result)
    assert ("CALL profile_psy_data%PreStart(\"multi_functions_multi_"
            "invokes_psy\", \"invoke_0:r1\", 0, 0)" in result)
    assert ("CALL profile_psy_data_1%PreStart(\"multi_functions_multi_"
            "invokes_psy\", \"invoke_0:r2\", 0, 0)" in result)
    assert ("CALL profile_psy_data_2%PreStart(\"multi_functions_multi_"
            "invokes_psy\", \"invoke_0:testkern_code:r3\", 0, 0)" in result)
Beispiel #5
0
def test_omp_transform():
    '''Tests that the profiling transform works correctly with OMP
     parallelisation.'''

    _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0",
                           name="invoke_loop1", dist_mem=False)
    schedule = invoke.schedule
    # This test expects constant loop bounds
    schedule._const_loop_bounds = True

    prt = ProfileTrans()
    omp_loop = GOceanOMPLoopTrans()
    omp_par = OMPParallelTrans()

    # Parallelise the first loop:
    omp_loop.apply(schedule[0])
    omp_par.apply(schedule[0])
    prt.apply(schedule[0])

    correct = (
        "      CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", "
        "\"invoke_loop1:bc_ssh_code:r0\", 0, 0)\n"
        "      !$omp parallel default(shared), private(i,j)\n"
        "      !$omp do schedule(static)\n"
        "      DO j=2,jstop\n"
        "        DO i=2,istop\n"
        "          CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n"
        "        END DO\n"
        "      END DO\n"
        "      !$omp end do\n"
        "      !$omp end parallel\n"
        "      CALL profile_psy_data%PostEnd")
    code = str(invoke.gen())
    assert correct in code

    # Now add another profile node between the omp parallel and omp do
    # directives:
    prt.apply(schedule[0].profile_body[0].dir_body[0])

    code = str(invoke.gen())

    correct = \
        "CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " + \
        '''"invoke_loop1:bc_ssh_code:r0", 0, 0)
      !$omp parallel default(shared), private(i,j)
      CALL profile_psy_data_1%PreStart("psy_test27_loop_swap", ''' + \
        '''"invoke_loop1:bc_ssh_code:r1", 0, 0)
      !$omp do schedule(static)
      DO j=2,jstop
        DO i=2,istop
          CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)
        END DO
      END DO
      !$omp end do
      CALL profile_psy_data_1%PostEnd
      !$omp end parallel
      CALL profile_psy_data%PostEnd'''

    assert correct in code
Beispiel #6
0
def test_multi_prefix_profile(monkeypatch):
    ''' Tests that the profiling transform works correctly when we use two
    different profiling tools in the same invoke.

    '''
    _, invoke = get_invoke("3.1_multi_functions_multi_invokes.f90",
                           "dynamo0.3", name="invoke_0", dist_mem=True)
    schedule = invoke.schedule
    prt = ProfileTrans()
    config = Config.get()
    # Monkeypatch the list of recognised PSyData prefixes
    monkeypatch.setattr(config, "_valid_psy_data_prefixes",
                        ["profile", "tool1"])
    # Use the 'tool1' prefix for the region around the halo exchanges.
    _ = prt.apply(schedule[0:4], options={"prefix": "tool1"})
    # Use the default prefix for the two loops.
    _ = prt.apply(schedule[1:3])
    result = str(invoke.gen())

    assert ("      USE profile_psy_data_mod, ONLY: profile_PSyDataType\n" in
            result)
    assert "      USE tool1_psy_data_mod, ONLY: tool1_PSyDataType" in result
    assert ("      TYPE(profile_PSyDataType), target, save :: "
            "profile_psy_data\n"
            "      TYPE(tool1_PSyDataType), target, save :: tool1_psy_data"
            in result)
    assert ("      ! Call kernels and communication routines\n"
            "      !\n"
            "      CALL tool1_psy_data%PreStart(\"multi_functions_multi_"
            "invokes_psy\", \"invoke_0:r0\", 0, 0)\n"
            "      IF (f1_proxy%is_dirty(depth=1)) THEN\n" in result)
    assert ("      CALL tool1_psy_data%PostEnd\n"
            "      CALL profile_psy_data%PreStart(\"multi_functions_multi_"
            "invokes_psy\", \"invoke_0:r1\", 0, 0)\n"
            "      DO cell=1,mesh%get_last_halo_cell(1)" in result)
    assert ("      CALL f1_proxy%set_dirty()\n"
            "      !\n"
            "      CALL profile_psy_data%PostEnd\n"
            "      DO cell=1,mesh%get_last_halo_cell(1)" in result)
Beispiel #7
0
def trans(psy):
    '''A PSyclone-script compliant transformation function. Adds
    profiling to an invoke.

    :param psy: The PSy layer object to apply transformations to.
    :type psy: :py:class:`psyclone.psyGen.PSy`
    '''
    print("Invokes found:\n{0}\n".format("\n".join(
        [str(name) for name in psy.invokes.names])))

    p_trans = ProfileTrans()

    for invoke in psy.invokes.invoke_list:

        sched = invoke.schedule
        if not sched:
            print("Invoke {0} has no Schedule! Skipping...".format(
                invoke.name))
            continue

        # Enclose all children of the schedule within a single profile region
        p_trans.apply(sched.children)
        sched.view()
Beispiel #8
0
def trans(psy):
    '''
    Take the supplied psy object, add OpenACC directives and then enclose
    the whole schedule within a profiling region.

    :param psy: the PSy layer to transform.
    :type psy: :py:class:`psyclone.gocean1p0.GOPSy`

    :returns: the transformed PSy object.
    :rtype: :py:class:`psyclone.gocean1p0.GOPSy`

    '''
    proftrans = ProfileTrans()

    # Use the trans() routine in acc_transform.py to add the OpenACC directives
    psy = acc_trans(psy)

    invoke = psy.invokes.get('invoke_0_inc_field')
    schedule = invoke.schedule

    # Enclose everything in a profiling region
    proftrans.apply(schedule.children)
    schedule.view()
    return psy
Beispiel #9
0
    def add_profile_nodes(schedule, loop_class):
        '''This function inserts all required Profiling Nodes (for invokes
        and kernels, as specified on the command line) into a schedule.
        :param schedule: The schedule to instrument.
        :type schedule: :py:class:`psyclone.psyGen.InvokeSchedule` or \
                        derived class
        :param loop_class: The loop class (e.g. GOLoop, DynLoop) to instrument.
        :type loop_class: :py:class:`psyclone.psyir.nodes.Loop` or \
                          derived class.
        '''

        from psyclone.psyir.transformations import ProfileTrans
        profile_trans = ProfileTrans()
        if Profiler.profile_kernels():
            for i in schedule.children:
                if isinstance(i, loop_class):
                    profile_trans.apply(i)
        if Profiler.profile_invokes():
            profile_trans.apply(schedule.children)
Beispiel #10
0
def test_no_psydata_in_kernels(parser, monkeypatch):
    ''' Check that we refuse to generate code when a kernels region
    contains PSyData calls. '''
    code = parser(FortranStringReader(EXPLICIT_LOOP))
    psy = PSyFactory(API, distributed_memory=False).create(code)
    schedule = psy.invokes.invoke_list[0].schedule
    ptrans = ProfileTrans()
    acc_trans = ACCKernelsTrans()
    acc_trans.apply(schedule[0])
    # Attempt to put a profiling call within the loop
    assign = schedule.walk(Assignment)[0]
    with pytest.raises(TransformationError) as err:
        ptrans.apply(assign)
    assert ("A PSyData node cannot be inserted inside an OpenACC region"
            in str(err.value))
    # Monkeypatch the validate() method so as to avoid the checking
    # that it does
    monkeypatch.setattr(ptrans, "validate", lambda x, y: None)
    ptrans.apply(assign)
    # Check that an appropriate error is raised at code-generation time
    with pytest.raises(GenerationError) as err:
        _ = psy.gen
    assert ("Cannot include calls to PSyData routines within OpenACC "
            "regions" in str(err.value))
Beispiel #11
0
   representation of NEMO code to insert profiling calls.

'''

from __future__ import absolute_import, print_function
import re
import pytest
from fparser.common.readfortran import FortranStringReader
from psyclone.errors import InternalError
from psyclone.configuration import Config
from psyclone.psyGen import PSyFactory
from psyclone.psyir.nodes import PSyDataNode
from psyclone.psyir.transformations import ProfileTrans, TransformationError

# The transformation that most of these tests use
PTRANS = ProfileTrans()


@pytest.fixture(scope="module", autouse=True)
def setup():
    '''Make sure that all tests here use the nemo API, and that we clean
    up the config file at the end of the tests.'''

    Config.get().api = "nemo"
    yield()
    # At the end of all tests make sure that we wipe the Config object
    # so we get a fresh/default one for any further test (and not a
    # left-over one from a test here).
    Config._instance = None

Beispiel #12
0
def test_profile_basic(capsys):
    '''Check basic functionality: node names, schedule view.
    '''
    Profiler.set_options([Profiler.INVOKES])
    _, invoke = get_invoke("test11_different_iterates_over_one_invoke.f90",
                           "gocean1.0",
                           idx=0,
                           dist_mem=False)
    # This test expects constant loop bounds
    invoke.schedule._const_loop_bounds = True
    Profiler.add_profile_nodes(invoke.schedule, Loop)

    assert isinstance(invoke.schedule[0], ProfileNode)

    invoke.schedule.view()
    out, _ = capsys.readouterr()

    gsched = colored("GOInvokeSchedule", GOInvokeSchedule._colour)
    sched = colored("Schedule", Schedule._colour)
    loop = Loop().coloured_name(True)
    profile = invoke.schedule[0].coloured_name(True)

    # Do one test based on schedule view, to make sure colouring
    # and indentation is correct
    expected = (gsched + "[invoke='invoke_0', Constant loop bounds=True]\n"
                "    0: " + profile + "[]\n"
                "        " + sched + "[]\n"
                "            0: " + loop +
                "[type='outer', field_space='go_cv', "
                "it_space='go_internal_pts']\n")
    assert expected in out

    prt = ProfileTrans()

    # Insert a profile call between outer and inner loop.
    # This tests that we find the subroutine node even
    # if it is not the immediate parent.
    new_sched, _ = prt.apply(invoke.schedule[0].profile_body[0].loop_body[0])

    new_sched_str = str(new_sched)
    correct = ("""GOInvokeSchedule[invoke='invoke_0', \
Constant loop bounds=True]:
ProfileStart[var=profile_psy_data]
GOLoop[id:'', variable:'j', loop_type:'outer']
Literal[value:'2', Scalar<INTEGER, UNDEFINED>]
Literal[value:'jstop-1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
ProfileStart[var=profile_psy_data_1]
GOLoop[id:'', variable:'i', loop_type:'inner']
Literal[value:'2', Scalar<INTEGER, UNDEFINED>]
Literal[value:'istop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
kern call: compute_cv_code
End Schedule
End GOLoop
ProfileEnd
End Schedule
End GOLoop
GOLoop[id:'', variable:'j', loop_type:'outer']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
GOLoop[id:'', variable:'i', loop_type:'inner']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
kern call: bc_ssh_code
End Schedule
End GOLoop
End Schedule
End GOLoop
ProfileEnd
End Schedule""")
    assert correct in new_sched_str

    Profiler.set_options(None)
Beispiel #13
0
def test_transform(capsys):
    '''Tests normal behaviour of profile region transformation.'''

    # pylint: disable=too-many-locals
    _, invoke = get_invoke("test27_loop_swap.f90",
                           "gocean1.0",
                           name="invoke_loop1",
                           dist_mem=False)
    schedule = invoke.schedule
    # This test expects constant loop bounds
    schedule._const_loop_bounds = True

    prt = ProfileTrans()
    assert str(prt) == "Create a sub-tree of the PSyIR that has " \
                       "a node of type ProfileNode at its root."
    assert prt.name == "ProfileTrans"

    # Try applying it to a list
    sched1, _ = prt.apply(schedule.children)

    correct = ("""GOInvokeSchedule[invoke='invoke_loop1', \
Constant loop bounds=True]:
ProfileStart[var=profile_psy_data]
GOLoop[id:'', variable:'j', loop_type:'outer']
Literal[value:'2', Scalar<INTEGER, UNDEFINED>]
Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
GOLoop[id:'', variable:'i', loop_type:'inner']
Literal[value:'2', Scalar<INTEGER, UNDEFINED>]
Literal[value:'istop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
kern call: bc_ssh_code
End Schedule
End GOLoop
End Schedule
End GOLoop
GOLoop[id:'', variable:'j', loop_type:'outer']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
GOLoop[id:'', variable:'i', loop_type:'inner']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'istop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
kern call: bc_solid_u_code
End Schedule
End GOLoop
End Schedule
End GOLoop
GOLoop[id:'', variable:'j', loop_type:'outer']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
GOLoop[id:'', variable:'i', loop_type:'inner']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
kern call: bc_solid_v_code
End Schedule
End GOLoop
End Schedule
End GOLoop
ProfileEnd
End Schedule""")
    assert correct in str(sched1)

    # Now only wrap a single node - the middle loop:
    sched2, _ = prt.apply(schedule[0].profile_body[1])

    correct = ("""GOInvokeSchedule[invoke='invoke_loop1', \
Constant loop bounds=True]:
ProfileStart[var=profile_psy_data]
GOLoop[id:'', variable:'j', loop_type:'outer']
Literal[value:'2', Scalar<INTEGER, UNDEFINED>]
Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
GOLoop[id:'', variable:'i', loop_type:'inner']
Literal[value:'2', Scalar<INTEGER, UNDEFINED>]
Literal[value:'istop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
kern call: bc_ssh_code
End Schedule
End GOLoop
End Schedule
End GOLoop
ProfileStart[var=profile_psy_data_1]
GOLoop[id:'', variable:'j', loop_type:'outer']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
GOLoop[id:'', variable:'i', loop_type:'inner']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'istop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
kern call: bc_solid_u_code
End Schedule
End GOLoop
End Schedule
End GOLoop
ProfileEnd
GOLoop[id:'', variable:'j', loop_type:'outer']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
GOLoop[id:'', variable:'i', loop_type:'inner']
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>]
Literal[value:'1', Scalar<INTEGER, UNDEFINED>]
Schedule:
kern call: bc_solid_v_code
End Schedule
End GOLoop
End Schedule
End GOLoop
ProfileEnd
End Schedule""")
    assert correct in str(sched2)

    # Check that a sublist created from individual elements
    # can be wrapped
    sched3, _ = prt.apply(
        [sched2[0].profile_body[0], sched2[0].profile_body[1]])
    sched3.view()
    out, _ = capsys.readouterr()

    gsched = colored("GOInvokeSchedule", GOInvokeSchedule._colour)
    prof = colored("Profile", ProfileNode._colour)
    sched = colored("Schedule", Schedule._colour)
    loop = colored("Loop", Loop._colour)

    indent = 4 * " "
    correct = (gsched +
               "[invoke='invoke_loop1', Constant loop bounds=True]\n" +
               indent + "0: " + prof + "[]\n" + 2 * indent + sched + "[]\n" +
               3 * indent + "0: " + prof + "[]\n" + 4 * indent + sched +
               "[]\n" + 5 * indent + "0: " + loop +
               "[type='outer', field_space='go_ct',"
               " it_space='go_internal_pts']\n")
    assert correct in out
    correct2 = (5 * indent + "1: " + prof + "[]\n" + 6 * indent + sched +
                "[]\n" + 7 * indent + "0: " + loop +
                "[type='outer', field_space='go_cu',"
                " it_space='go_all_pts']\n")
    assert correct2 in out
Beispiel #14
0
from psyclone.psyir.transformations import TransformationError, ProfileTrans
from psyclone.psyir.nodes import IfBlock, CodeBlock, Schedule, \
    ArrayReference, Assignment, BinaryOperation, NaryOperation, Loop, \
    Literal, Return, Call
from psyclone.psyir.symbols import ScalarType
from psyclone.nemo import NemoInvokeSchedule, NemoKern, NemoLoop
from psyclone.errors import InternalError

# Which version of the NVIDIA (PGI) compiler we are targetting (different
# versions have different bugs that we have to workaround).
PGI_VERSION = 1940  # i.e. 19.4

# Get the PSyclone transformations we will use
ACC_KERN_TRANS = TransInfo().get_trans_name('ACCKernelsTrans')
ACC_LOOP_TRANS = TransInfo().get_trans_name('ACCLoopTrans')
PROFILE_TRANS = ProfileTrans()

# Whether or not to automatically add profiling calls around
# un-accelerated regions
_AUTO_PROFILE = True
# If routine names contain these substrings then we do not profile them
PROFILING_IGNORE = [
    "_init",
    "_rst",
    "alloc",
    "agrif",
    "flo_dom",
    "ice_thd_pnd",
    "macho",
    "mpp_",
    "nemo_gcm",