Esempio n. 1
0
def test_no_parent_accdirective():
    ''' Test that applying Extract Transformation on an orphaned
    ACCLoopDirective without its ancestor ACCParallelDirective
    when optimisations are applied raises a TransformationError. '''

    etrans = GOceanExtractTrans()
    acclpt = ACCLoopTrans()
    accpara = ACCParallelTrans()
    accdata = ACCEnterDataTrans()

    _, invoke = get_invoke("single_invoke_three_kernels.f90",
                           GOCEAN_API,
                           idx=0,
                           dist_mem=False)
    schedule = invoke.schedule

    # Apply the OpenACC Loop transformation to every loop in the Schedule
    for child in schedule.children:
        if isinstance(child, Loop):
            acclpt.apply(child)
    # Enclose all of these loops within a single ACC Parallel region
    accpara.apply(schedule.children)
    # Add a mandatory ACC enter-data directive
    accdata.apply(schedule)

    orphaned_directive = schedule.children[1].children[0]
    with pytest.raises(TransformationError) as excinfo:
        _, _ = etrans.apply(orphaned_directive)
    assert "Error in GOceanExtractTrans: Application to Nodes enclosed " \
           "within a thread-parallel region is not allowed." \
           in str(excinfo.value)
Esempio n. 2
0
def trans(psy):
    ''' Take the supplied psy object, apply OpenACC transformations
    to the schedule of invoke_0 and return the new psy object '''
    ptrans = ACCParallelTrans()
    ltrans = ACCLoopTrans()
    dtrans = ACCEnterDataTrans()
    ktrans = ACCRoutineTrans()

    invoke = psy.invokes.get('invoke_0_inc_field')
    schedule = invoke.schedule
    schedule.view()

    # Apply the OpenACC Loop transformation to *every* loop
    # nest in the schedule
    for child in schedule.children:
        if isinstance(child, Loop):
            ltrans.apply(child, {"collapse": 2})

    # Put all of the loops in a single parallel region
    ptrans.apply(schedule.children)

    # Add an enter-data directive
    dtrans.apply(schedule)

    # Put an 'acc routine' directive inside each kernel
    for kern in schedule.coded_kernels():
        ktrans.apply(kern)
        # Ideally we would module-inline the kernel here (to save having to
        # rely on the compiler to do it) but this does not currently work
        # for the fparser2 AST (issue #229).
        # _, _ = itrans.apply(kern)

    schedule.view()
    return psy
Esempio n. 3
0
def collapse_loops(nodes):
    '''
    Searches the supplied list of nodes and applies an ACC LOOP COLLAPSE(2)
    directive to any perfectly-nested lon-lat loops.

    :param nodes: list of nodes to search for loops.
    :type nodes: list of :py:class:`psyclone.psyir.nodes.Node`

    '''
    loop_trans = ACCLoopTrans()

    for node in nodes:
        loops = node.walk(Loop)
        for loop in loops:
            if loop.ancestor(ACCLoopDirective):
                # We've already transformed a parent Loop so skip this one.
                continue
            loop_options = {}
            # We put a COLLAPSE(2) clause on any perfectly-nested lon-lat
            # loops that have a Literal value for their step. The latter
            # condition is necessary to avoid compiler errors with 20.7.
            if loop.loop_type == "lat" and \
               isinstance(loop.step_expr, Literal) and \
               isinstance(loop.loop_body[0], Loop) and \
               loop.loop_body[0].loop_type == "lon" and \
               isinstance(loop.loop_body[0].step_expr, Literal) and \
               len(loop.loop_body.children) == 1:
                loop_options["collapse"] = 2
            if loop_options:
                loop_trans.apply(loop, loop_options)
Esempio n. 4
0
def test_no_parent_accdirective():
    ''' Test that applying Extract Transformation on an orphaned
    ACCLoopDirective without its ancestor ACCParallelDirective
    when optimisations are applied raises a TransformationError. '''
    from psyclone.transformations import ACCParallelTrans, ACCEnterDataTrans, \
        ACCLoopTrans

    etrans = GOceanExtractRegionTrans()
    acclpt = ACCLoopTrans()
    accpara = ACCParallelTrans()
    accdata = ACCEnterDataTrans()

    _, invoke_info = parse(os.path.join(GOCEAN_BASE_PATH,
                                        "single_invoke_three_kernels.f90"),
                           api=GOCEAN_API)
    psy = PSyFactory(GOCEAN_API, distributed_memory=False).create(invoke_info)
    invoke = psy.invokes.invoke_list[0]
    schedule = invoke.schedule

    # Apply the OpenACC Loop transformation to every loop in the Schedule
    for child in schedule.children:
        if isinstance(child, Loop):
            schedule, _ = acclpt.apply(child)
    # Enclose all of these loops within a single ACC Parallel region
    schedule, _ = accpara.apply(schedule.children)
    # Add a mandatory ACC enter-data directive
    schedule, _ = accdata.apply(schedule)

    orphaned_directive = schedule.children[1].children[0]
    with pytest.raises(TransformationError) as excinfo:
        _, _ = etrans.apply(orphaned_directive)
    assert ("Extraction of Nodes enclosed within a thread parallel "
            "region is not allowed.") in str(excinfo)
Esempio n. 5
0
def test_loop_after_implicit_kernels(parser):
    ''' Test the addition of a loop directive after some implicit loops
    within a kernels region. '''
    reader = FortranStringReader("program two_loops\n"
                                 "  integer :: ji\n"
                                 "  real :: array(10,10)\n"
                                 "  array(:,:) = -1.0\n"
                                 "  do ji = 1, 5\n"
                                 "    array(ji,1) = 2.0*array(ji,2)\n"
                                 "  end do\n"
                                 "end program two_loops\n")
    code = parser(reader)
    psy = PSyFactory(API, distributed_memory=False).create(code)
    schedule = psy.invokes.invoke_list[0].schedule
    acc_trans = ACCKernelsTrans()
    loop_trans = ACCLoopTrans()
    acc_trans.apply(schedule[0:2])
    loop_trans.apply(schedule[0].dir_body[1])
    output = str(psy.gen).lower()
    assert ("  !$acc kernels\n"
            "  array(:, :) = - 1.0\n"
            "  !$acc loop independent\n"
            "  do ji = 1, 5\n" in output)
    assert ("  end do\n"
            "  !$acc end kernels\n"
            "end program" in output)
Esempio n. 6
0
def test_accloop():
    ''' Generic tests for the ACCLoopTrans transformation class '''
    trans = ACCLoopTrans()
    assert trans.name == "ACCLoopTrans"
    assert str(trans) == "Adds an 'OpenACC loop' directive to a loop"

    cnode = Statement()
    tdir = trans._directive([cnode])
    assert isinstance(tdir, ACCLoopDirective)
Esempio n. 7
0
def test_accloop():
    ''' Generic tests for the ACCLoopTrans transformation class '''
    from psyclone.transformations import ACCLoopTrans
    from psyclone.psyGen import ACCLoopDirective
    trans = ACCLoopTrans()
    assert trans.name == "ACCLoopTrans"
    assert str(trans) == "Adds an 'OpenACC loop' directive to a loop"

    pnode = Node()
    cnode = Statement()
    tdir = trans._directive(pnode, [cnode])
    assert isinstance(tdir, ACCLoopDirective)
Esempio n. 8
0
def test_loop_inside_kernels(parser):
    ''' Check that we can put an ACC LOOP directive inside a KERNELS
    region. '''
    code = parser(FortranStringReader(EXPLICIT_LOOP))
    psy = PSyFactory(API, distributed_memory=False).create(code)
    schedule = psy.invokes.invoke_list[0].schedule
    acc_trans = ACCKernelsTrans()
    acc_trans.apply([schedule[0]])
    loop_trans = ACCLoopTrans()
    loop_trans.apply(schedule[0].dir_body[0])
    output = str(psy.gen).lower()
    assert ("  !$acc kernels\n"
            "  !$acc loop independent\n"
            "  do ji = 1, jpj\n" in output)
    assert ("  end do\n" "  !$acc end kernels\n" in output)
Esempio n. 9
0
def trans(psy):
    ''' Take the supplied psy object, apply OpenACC transformations
    to the schedule of the first invoke and return the new psy object '''
    from psyclone.transformations import ACCParallelTrans, \
        ACCEnterDataTrans, ACCLoopTrans, ACCRoutineTrans, \
        KernelGlobalsToArguments
    ptrans = ACCParallelTrans()
    ltrans = ACCLoopTrans()
    dtrans = ACCEnterDataTrans()
    ktrans = ACCRoutineTrans()
    g2localtrans = KernelGlobalsToArguments()

    invoke = psy.invokes.invoke_list[0]
    schedule = invoke.schedule
    schedule.view()

    # Apply the OpenACC Loop transformation to *every* loop
    # nest in the schedule
    from psyclone.psyir.nodes import Loop
    for child in schedule.children:
        if isinstance(child, Loop):
            newschedule, _ = ltrans.apply(child, {"collapse": 2})
            schedule = newschedule

    # Put all of the loops in a single parallel region
    newschedule, _ = ptrans.apply(schedule.children)

    # Add an enter-data directive
    newschedule, _ = dtrans.apply(schedule)

    # Convert any accesses to global data into kernel arguments and then
    # put an 'acc routine' directive inside each kernel
    for kern in schedule.coded_kernels():
        if kern.name == "kern_use_var_code":
            # TODO #490 and #663. This currently won't work because the
            # KernelGlobalsToArguments transformation works on the PSyIR but
            # the subsequent ACCRoutineTrans works on the fparser2 parse tree.
            g2localtrans.apply(kern)
        _, _ = ktrans.apply(kern)
        # Ideally we would module-inline the kernel here (to save having to
        # rely on the compiler to do it) but this does not currently work
        # for the fparser2 AST (issue #229).
        # _, _ = itrans.apply(kern)

    invoke.schedule = newschedule
    newschedule.view()
    return psy
Esempio n. 10
0
def trans(psy):
    '''PSyclone transformation script for the dynamo0p3 api to apply
    OpenACC loop, parallel and enter data directives generically.

    :param psy: a PSyclone PSy object which captures the algorithm and \
        kernel information required by PSyclone.
    :type psy: subclass of :py:class:`psyclone.psyGen.PSy`

    '''
    kernels_trans = ACCKernelsTrans()
    routine_trans = ACCRoutineTrans()
    ctrans = Dynamo0p3ColourTrans()
    loop_trans = ACCLoopTrans()
    enter_trans = ACCEnterDataTrans()
    const = LFRicConstants()

    # Loop over all of the Invokes in the PSy object
    for invoke in psy.invokes.invoke_list:

        schedule = invoke.schedule

        # Colour loops as required
        for loop in schedule.loops():
            if loop.field_space.orig_name \
               not in const.VALID_DISCONTINUOUS_NAMES \
               and loop.iteration_space == "cell_column":
                ctrans.apply(loop)

        # Add Kernels and Loop directives
        for loop in schedule.loops():
            if loop.loop_type != "colours":
                kernels_trans.apply([loop])
                loop_trans.apply(loop)

        # Add Routine directive to kernels
        for kernel in schedule.coded_kernels():
            routine_trans.apply(kernel)

        # Add Enter Data directive covering all of the PSy layer.
        enter_trans.apply(schedule)

        schedule.view()

    return psy
Esempio n. 11
0
def trans(psy):
    ''' Take the supplied psy object, apply OpenACC transformations
    to the schedule of the first invoke and return the new psy object '''
    from psyclone.transformations import ACCParallelTrans, \
        ACCEnterDataTrans, ACCLoopTrans, ACCRoutineTrans, \
        KernelGlobalsToArguments, KernelModuleInlineTrans
    ptrans = ACCParallelTrans()
    ltrans = ACCLoopTrans()
    dtrans = ACCEnterDataTrans()
    ktrans = ACCRoutineTrans()
    itrans = KernelModuleInlineTrans()
    g2localtrans = KernelGlobalsToArguments()

    invoke = psy.invokes.invoke_list[0]
    schedule = invoke.schedule
    schedule.view()

    # Apply the OpenACC Loop transformation to *every* loop
    # nest in the schedule
    from psyclone.psyir.nodes import Loop
    for child in schedule.children:
        if isinstance(child, Loop):
            newschedule, _ = ltrans.apply(child, {"collapse": 2})
            schedule = newschedule

    # Put all of the loops in a single parallel region
    newschedule, _ = ptrans.apply(schedule.children)

    # Add an enter-data directive
    newschedule, _ = dtrans.apply(schedule)

    # Convert any accesses to global data into kernel arguments, put an
    # 'acc routine' directive inside, and module-inline each kernel
    for kern in schedule.coded_kernels():
        if kern.name == "kern_use_var_code":
            g2localtrans.apply(kern)
        _, _ = ktrans.apply(kern)
        _, _ = itrans.apply(kern)

    invoke.schedule = newschedule
    newschedule.view()
    return psy
Esempio n. 12
0
def trans(psy):
    '''PSyclone transformation script for the dynamo0p3 api to apply
    OpenACC loop, parallel and enter data directives generically.

    '''
    loop_trans = ACCLoopTrans()
    parallel_trans = ACCParallelTrans()
    enter_data_trans = ACCEnterDataTrans()

    # Loop over all of the Invokes in the PSy object
    for invoke in psy.invokes.invoke_list:

        print("Transforming invoke '" + invoke.name + "'...")
        schedule = invoke.schedule
        for loop in schedule.loops():
            _, _ = loop_trans.apply(loop)
            _, _ = parallel_trans.apply(loop.parent)
        _, _ = enter_data_trans.apply(schedule)

    return psy
Esempio n. 13
0
def trans(psy):
    ''' Take the supplied psy object, apply OpenACC transformations
    to the schedule of invoke_0 and return the new psy object '''
    from psyclone.transformations import ACCParallelTrans, \
        ACCDataTrans, ACCLoopTrans, ACCRoutineTrans, KernelModuleInlineTrans
    ptrans = ACCParallelTrans()
    ltrans = ACCLoopTrans()
    dtrans = ACCDataTrans()
    ktrans = ACCRoutineTrans()
    itrans = KernelModuleInlineTrans()

    invoke = psy.invokes.get('invoke_0_inc_field')
    schedule = invoke.schedule
    # schedule.view()

    # Apply the OpenACC Loop transformation to *every* loop
    # nest in the schedule
    from psyclone.psyGen import Loop
    for child in schedule.children:
        if isinstance(child, Loop):
            newschedule, _ = ltrans.apply(child, collapse=2)
            schedule = newschedule

    # Put all of the loops in a single parallel region
    newschedule, _ = ptrans.apply(schedule.children)

    # Add an enter-data directive
    newschedule, _ = dtrans.apply(schedule)

    # Put an 'acc routine' directive inside each kernel
    for kern in schedule.kern_calls():
        _, _ = ktrans.apply(kern)
        # Ideally we would module-inline the kernel here (to save having to
        # rely on the compiler to do it) but this does not currently work
        # for the fparser2 AST (issue #229).
        # _, _ = itrans.apply(kern)

    invoke.schedule = newschedule
    newschedule.view()
    return psy
Esempio n. 14
0
def test_two_loops_inside_kernels(parser):
    ''' Check that we can mark-up one or both loops inside a KERNELS
    region containing two loops. '''
    reader = FortranStringReader("program two_loops\n"
                                 "  integer :: ji\n"
                                 "  real :: array(10)\n"
                                 "  do ji = 1, 10\n"
                                 "    array(ji) = 1.0\n"
                                 "  end do\n"
                                 "  do ji = 1, 5\n"
                                 "    array(ji) = 2.0*array(ji)\n"
                                 "  end do\n"
                                 "end program two_loops\n")
    code = parser(reader)
    psy = PSyFactory(API, distributed_memory=False).create(code)
    schedule = psy.invokes.invoke_list[0].schedule
    # Enclose both loops within a KERNELS region
    acc_trans = ACCKernelsTrans()
    acc_trans.apply(schedule[0:2])
    # Apply a loop transformation to just the first loop
    loop_trans = ACCLoopTrans()
    loop_trans.apply(schedule[0].dir_body[0])
    output = str(psy.gen).lower()
    assert ("  !$acc kernels\n"
            "  !$acc loop independent\n"
            "  do ji = 1, 10\n" in output)
    assert ("  end do\n" "  !$acc end kernels\n" "end program" in output)
    loop_trans.apply(schedule[0].dir_body[1])
    output = str(psy.gen).lower()
    assert ("  !$acc loop independent\n" "  do ji = 1, 5\n" in output)
    assert ("  end do\n" "  !$acc end kernels\n" "end program" in output)