def trans(psy):
    ''' Python script intended to be passed to PSyclone's generate()
    function via the -s option. Applies OpenMP to every loop before
    enclosing them all within a single OpenMP PARALLEL region. '''

    from psyclone.psyGen import TransInfo
    tinfo = TransInfo()
    ltrans = tinfo.get_trans_name('GOceanOMPLoopTrans')
    rtrans = tinfo.get_trans_name('OMPParallelTrans')

    schedule = psy.invokes.get('invoke_0').schedule
    # schedule.view()

    # Apply the OpenMP Loop transformation to *every* loop
    # in the schedule
    for child in schedule.children:
        newschedule, _ = ltrans.apply(child)
        schedule = newschedule

    # Enclose all of these loops within a single OpenMP
    # PARALLEL region
    newschedule, _ = rtrans.apply(schedule.children)

    psy.invokes.get('invoke_0').schedule = newschedule
    return psy
Esempio n. 2
0
def trans(psy):
    ''' Transformation entry point '''
    config = Config.get()
    tinfo = TransInfo()
    parallel_loop_trans = tinfo.get_trans_name('GOceanOMPParallelLoopTrans')
    loop_trans = tinfo.get_trans_name('GOceanOMPLoopTrans')
    parallel_trans = tinfo.get_trans_name('OMPParallelTrans')
    module_inline_trans = tinfo.get_trans_name('KernelModuleInline')

    schedule = psy.invokes.get('invoke_0').schedule

    # Inline all kernels in this Schedule
    for kernel in schedule.kernels():
        module_inline_trans.apply(kernel)

    # Apply the OpenMPLoop transformation to every child in the schedule or
    # OpenMPParallelLoop to every Loop if it has distributed memory.
    for child in schedule.children:
        if config.distributed_memory:
            if isinstance(child, Loop):
                parallel_loop_trans.apply(child)
        else:
            loop_trans.apply(child)

    if not config.distributed_memory:
        # If it is not distributed memory, enclose all of these loops
        # within a single OpenMP PARALLEL region
        parallel_trans.apply(schedule.children)

    return psy
Esempio n. 3
0
def trans(psy):
    ''' Take the supplied psy object, apply OpenACC transformations
    to the schedule of invoke_0 and return the new psy object '''
    tinfo = TransInfo()
    parallel_trans = tinfo.get_trans_name('ACCParallelTrans')
    loop_trans = tinfo.get_trans_name('ACCLoopTrans')
    enter_data_trans = tinfo.get_trans_name('ACCEnterDataTrans')
    routine_trans = tinfo.get_trans_name('ACCRoutineTrans')
    glo2arg_trans = tinfo.get_trans_name('KernelImportsToArguments')
    inline_trans = tinfo.get_trans_name('KernelModuleInline')

    invoke = psy.invokes.get('invoke_0')
    schedule = invoke.schedule

    # Apply the OpenACC Loop transformation to *every* loop
    # in the schedule
    for child in schedule.children:
        if isinstance(child, Loop):
            loop_trans.apply(child, {"collapse": 2})

    # Put all of the loops in a single parallel region
    parallel_trans.apply(schedule)

    # Add an enter-data directive
    enter_data_trans.apply(schedule)

    # Apply ACCRoutineTrans to each kernel, which also requires that
    # any global variables must be removed first.
    for kern in schedule.coded_kernels():
        glo2arg_trans.apply(kern)
        routine_trans.apply(kern)
        inline_trans.apply(kern)

    return psy
Esempio n. 4
0
def trans(psy):
    '''
    Transformation routine for use with PSyclone. Converts any global-variable
    accesses into kernel arguments and then applies the OpenCL transformation
    to the PSy layer.

    :param psy: the PSy object which this script will transform.
    :type psy: :py:class:`psyclone.psyGen.PSy`
    :returns: the transformed PSy object.
    :rtype: :py:class:`psyclone.psyGen.PSy`

    '''

    # Get the necessary transformations
    tinfo = TransInfo()
    globaltrans = tinfo.get_trans_name('KernelGlobalsToArguments')
    move_boundaries_trans = GOMoveIterationBoundariesInsideKernelTrans()
    cltrans = tinfo.get_trans_name('OCLTrans')

    for invoke in psy.invokes.invoke_list:
        print("Converting to OpenCL invoke: " + invoke.name)
        schedule = invoke.schedule

        # Skip invoke_2 as its time_smooth_code kernel contains a
        # module variable (alpha) which is not dealt with by the
        # KernelGlobalsToArguments transformation, see issue #826.
        if invoke.name == "invoke_2":
            continue

        # Remove the globals from inside each kernel and move PSy-layer
        # loop boundaries inside the kernel as a mask.
        for kern in schedule.kernels():
            print("Update kernel: " + kern.name)
            move_boundaries_trans.apply(kern)
            globaltrans.apply(kern)

        # Transform invoke to OpenCL
        cltrans.apply(schedule)

    return psy
Esempio n. 5
0
def trans(psy):
    ''' Transformation script entry function '''

    tinfo = TransInfo()
    itrans = tinfo.get_trans_name('KernelModuleInline')

    schedule = psy.invokes.get('invoke_0').schedule

    # Module-Inline all coded kernels in this Schedule
    for kernel in schedule.coded_kernels():
        itrans.apply(kernel)

    return psy
Esempio n. 6
0
from __future__ import print_function
from psyclone.parse.algorithm import parse
from psyclone.psyGen import PSyFactory
from psyclone.psyGen import TransInfo
API = "dynamo0.1"
_, INVOKEINFO = parse("dynamo_algorithm_mod.F90", api=API)
PSY = PSyFactory(API).create(INVOKEINFO)
print(PSY.gen)

print(PSY.invokes.names)

TRANS = TransInfo()
print(TRANS.list)

LOOP_FUSE = TRANS.get_trans_name('LoopFuse')
OMP_PAR = TRANS.get_trans_name('OMPParallelLoopTrans')

SCHEDULE = PSY.invokes.get('invoke_0').schedule
SCHEDULE.view()

FUSE_SCHEDULE, _ = LOOP_FUSE.apply(SCHEDULE.children[0], SCHEDULE.children[1])
FUSE_SCHEDULE.view()
OMP_SCHEDULE, _ = OMP_PAR.apply(FUSE_SCHEDULE.children[0])
OMP_SCHEDULE.view()

PSY.invokes.get('invoke_0').schedule = OMP_SCHEDULE

SCHEDULE = PSY.invokes.get('invoke_1_v2_kernel_type').schedule
SCHEDULE.view()
Esempio n. 7
0
if __name__ == "__main__":
    from psyclone.parse.algorithm import parse
    from psyclone.psyGen import PSyFactory, TransInfo

    API = "gocean1.0"
    _, INVOKEINFO = parse("shallow_alg.f90", api=API)
    PSY = PSyFactory(API, distributed_memory=False).create(INVOKEINFO)
    print(PSY.gen)

    print(PSY.invokes.names)
    SCHEDULE = PSY.invokes.get('invoke_0').schedule
    SCHEDULE.view()

    TRANS_INFO = TransInfo()
    print(TRANS_INFO.list)
    FUSE_TRANS = TRANS_INFO.get_trans_name('LoopFuse')
    PTRANS = TRANS_INFO.get_trans_name('ACCParallelTrans')
    DTRANS = TRANS_INFO.get_trans_name('ACCEnterDataTrans')
    LTRANS = TRANS_INFO.get_trans_name('ACCLoopTrans')

    # invoke0
    # fuse all outer loops
    LF1_SCHEDULE, _ = FUSE_TRANS.apply(SCHEDULE.children[0],
                                       SCHEDULE.children[1])
    LF2_SCHEDULE, _ = FUSE_TRANS.apply(LF1_SCHEDULE.children[0],
                                       LF1_SCHEDULE.children[1])
    LF3_SCHEDULE, _ = FUSE_TRANS.apply(LF2_SCHEDULE.children[0],
                                       LF2_SCHEDULE.children[1])
    LF3_SCHEDULE.view()

    # fuse all inner loops
Esempio n. 8
0
from psyclone.parse import parse
from psyclone.psyGen import PSyFactory, TransInfo

API = "gocean1.0"
_, INVOKEINFO = parse("shallow_alg.f90", api=API)
PSY = PSyFactory(API).create(INVOKEINFO)
# Print the vanilla, generated Fortran
print PSY.gen

print PSY.invokes.names
SCHEDULE = PSY.invokes.get('invoke_0').schedule
SCHEDULE.view()

TRANS_INFO = TransInfo()
print TRANS_INFO.list
FUSE_TRANS = TRANS_INFO.get_trans_name('LoopFuse')

# fuse all outer loops
LF1_SCHED, _ = FUSE_TRANS.apply(SCHEDULE.children[0], SCHEDULE.children[1])
LF2_SCHED, _ = FUSE_TRANS.apply(LF1_SCHED.children[0], LF1_SCHED.children[1])
LF3_SCHED, _ = FUSE_TRANS.apply(LF2_SCHED.children[0], LF2_SCHED.children[1])
LF3_SCHED.view()

# fuse all inner loops
LF4_SCHED, _ = FUSE_TRANS.apply(LF3_SCHED.children[0].children[0],
                                LF3_SCHED.children[0].children[1])
LF5_SCHED, _ = FUSE_TRANS.apply(LF4_SCHED.children[0].children[0],
                                LF4_SCHED.children[0].children[1])
LF6_SCHED, _ = FUSE_TRANS.apply(LF5_SCHED.children[0].children[0],
                                LF5_SCHED.children[0].children[1])
LF6_SCHED.view()
Esempio n. 9
0
from psyclone.parse import parse
from psyclone.psyGen import PSyFactory, TransInfo

if __name__ == "__main__":
    from psyclone.nemo import NemoKern
    API = "nemo"
    _, INVOKEINFO = parse("tra_adv.F90", api=API)
    PSY = PSyFactory(API).create(INVOKEINFO)
    print(PSY.gen)

    print("Invokes found:")
    print(PSY.invokes.names)

    SCHED = PSY.invokes.get('tra_adv').schedule
    SCHED.view()

    TRANS_INFO = TransInfo()
    OMP_TRANS = TRANS_INFO.get_trans_name('OMPParallelLoopTrans')

    for loop in SCHED.loops():
        # TODO loop.kernel method needs extending to cope with
        # multiple kernels
        kernels = loop.walk(loop.children, NemoKern)
        if kernels and loop.loop_type == "levels":
            SCHED, _ = OMP_TRANS.apply(loop)

    SCHED.view()

    PSY.invokes.get('tra_adv').schedule = SCHED
    print(PSY.gen)
Esempio n. 10
0
from psyclone.parse import parse
from psyclone.psyGen import PSyFactory, TransInfo

API = "gocean1.0"
_, INVOKEINFO = parse("shallow_alg.f90", api=API)
PSY = PSyFactory(API).create(INVOKEINFO)
print PSY.gen

print PSY.invokes.names
SCHEDULE = PSY.invokes.get('invoke_0').schedule
SCHEDULE.view()

TRANS_INFO = TransInfo()
print TRANS_INFO.list
FUSE_TRANS = TRANS_INFO.get_trans_name('LoopFuse')
OMP_TRANS = TRANS_INFO.get_trans_name('GOceanOMPParallelLoopTrans')

# invoke0
# fuse all outer loops
LF1_SCHEDULE, _ = FUSE_TRANS.apply(SCHEDULE.children[0], SCHEDULE.children[1])
LF2_SCHEDULE, _ = FUSE_TRANS.apply(LF1_SCHEDULE.children[0],
                                   LF1_SCHEDULE.children[1])
LF3_SCHEDULE, _ = FUSE_TRANS.apply(LF2_SCHEDULE.children[0],
                                   LF2_SCHEDULE.children[1])
LF3_SCHEDULE.view()

# fuse all inner loops
LF4_SCHEDULE, _ = FUSE_TRANS.apply(LF3_SCHEDULE.children[0].children[0],
                                   LF3_SCHEDULE.children[0].children[1])
LF5_SCHEDULE, _ = FUSE_TRANS.apply(LF4_SCHEDULE.children[0].children[0],
Esempio n. 11
0
if __name__ == "__main__":
    from psyclone.parse import parse
    from psyclone.psyGen import PSyFactory, TransInfo

    api = "gocean1.0"
    _, invokeinfo = parse("shallow_alg.f90", api=api)
    psy = PSyFactory(api).create(invokeinfo)
    print(psy.gen)

    print(psy.invokes.names)
    schedule = psy.invokes.get('invoke_0').schedule
    schedule.view()

    trans_info = TransInfo()
    print(trans_info.list)
    fuse_trans = trans_info.get_trans_name('LoopFuse')
    ptrans = trans_info.get_trans_name('ACCParallelTrans')
    dtrans = trans_info.get_trans_name('ACCDataTrans')
    ltrans = trans_info.get_trans_name('ACCLoopTrans')

    # invoke0
    # fuse all outer loops
    lf1_schedule, _ = fuse_trans.apply(schedule.children[0],
                                       schedule.children[1])
    lf2_schedule, _ = fuse_trans.apply(lf1_schedule.children[0],
                                       lf1_schedule.children[1])
    lf3_schedule, _ = fuse_trans.apply(lf2_schedule.children[0],
                                       lf2_schedule.children[1])
    lf3_schedule.view()

    # fuse all inner loops
Esempio n. 12
0
def trans(psy):
    ''' Transform the schedule for OpenCL generation '''

    # Import transformations
    tinfo = TransInfo()
    globaltrans = tinfo.get_trans_name('KernelImportsToArguments')
    move_boundaries_trans = GOMoveIterationBoundariesInsideKernelTrans()
    cltrans = GOOpenCLTrans()

    # Get the invoke routine
    schedule = psy.invokes.get('invoke_0').schedule

    # Map the kernels by their name to different OpenCL queues. The multiple
    # command queues can be executed concurrently while each command queue
    # executes in-order its kernels. This provides functional parallelism
    # when kernels don't have dependencies between them.
    qmap = {
        'continuity_code': 1,
        'momentum_u_code': 2,
        'momentum_v_code': 3,
        'bc_ssh_code': 1,
        'bc_solid_u_code': 2,
        'bc_solid_v_code': 3,
        'bc_flather_u_code': 2,
        'bc_flather_v_code': 3,
        'field_copy_code': 1,
        'next_sshu_code': 1,
        'next_sshv_code': 1
    }

    # Remove global variables from inside each kernel, pass the boundary
    # values as arguments to the kernel and set the OpenCL work size to 64,
    # which is required for performance (with OpenCL < 1.2 this requires
    # the resulting application to be executed with DL_ESM_ALIGNMENT=64).
    # Technically the OpenCL global_size (which is controlled by
    # DL_ESM_ALIGNMENT) must be divisible by the work_group_size (which
    # is set to 64 in the psyclone script) in OpenCL implementations < 2.0.
    # But from OpenCL 2.0 the standard says its not necessary anymore.
    # In practice it is safe to always use it as most implementations
    # are lacking in this aspect.
    # If using a different WORK_GROUP_SIZE, make sure to update the
    # DL_ESM_ALIGNMENT to match.
    for kern in schedule.kernels():
        print(kern.name)
        globaltrans.apply(kern)
        if MOVE_BOUNDARIES:
            move_boundaries_trans.apply(kern)
        if FUCTIONAL_PARALLELISM:
            kern.set_opencl_options({
                'local_size': WORK_GROUP_SIZE,
                'queue_number': qmap[kern.name]
            })
        else:
            kern.set_opencl_options({'local_size': WORK_GROUP_SIZE})

    # Transform invoke to OpenCL
    cltrans.apply(schedule)

    if XILINX_CONFIG_FILE:
        # Create a Xilinx Compiler Configuration file
        path = Config.get().kernel_output_dir
        with open(os.path.join(path, "xilinx.cfg"), "w") as cfgfile:
            cfgfile.write("# Xilinx FPGA configuration file\n")
            # cfgfile.write("[connectivity]\n")
            # cfgfile.write("# Create 2 CU of the given kernels\n")
            # cfgfile.write("nk=continuity_code:2\n")
            # cfgfile.write("nk=momentum_u_code:2\n")
            # cfgfile.write("nk=momentum_v_code:2\n")
            # cfgfile.write("nk=bc_ssh_code:2\n")

            # cfgfile.write("\n[hls]\n")
            # cfgfile.write("# Assign CUs to different SLRs\n")
            # cfgfile.write("slr=momentum_u_code_1:SLR0\n")
            # cfgfile.write("slr=momentum_u_code_2:SLR0\n")
            # cfgfile.write("slr=momentum_v_code_1:SLR2\n")
            # cfgfile.write("slr=momentum_v_code_2:SLR2\n")

    return psy
Esempio n. 13
0
print(psy.gen)

# List the various invokes that the PSy layer contains
print(psy.invokes.names)

# Get the loop schedule associated with one of these
# invokes
schedule = psy.invokes.get('invoke_0_v3_kernel_type').schedule
schedule.view()

# Get the list of possible loop transformations
t = TransInfo()
print(t.list)

# Create an OpenMPLoop-transformation object
ol = t.get_trans_name('OMPParallelLoopTrans')

# Apply it to the loop schedule of the selected invoke
ol.apply(schedule.children[0])
schedule.view()

# Generate the Fortran code for the new PSy layer
print(psy.gen)

schedule = psy.invokes.get('invoke_1_v3_solver_kernel_type').schedule
schedule.view()

ol.apply(schedule.children[0])
schedule.view()

print(psy.gen)
Esempio n. 14
0
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Author R. Ford STFC Daresbury Lab

from __future__ import print_function
from psyclone.parse.algorithm import parse
from psyclone.psyGen import PSyFactory
from psyclone.psyGen import TransInfo
api = "dynamo0.1"
ast, invokeInfo = parse("dynamo_algorithm_mod.F90", api=api)
psy = PSyFactory(api).create(invokeInfo)
print(psy.gen)

print(psy.invokes.names)

schedule = psy.invokes.get('invoke_0').schedule
schedule.view()

t = TransInfo()
print(t.list)

lf = t.get_trans_name('LoopFuse')

schedule.view()
new_schedule, memento = lf.apply(schedule.children[0], schedule.children[1])
new_schedule.view()

psy.invokes.get('invoke_0').schedule = new_schedule
print(psy.gen)
Esempio n. 15
0
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Authors: R. Ford and A. R. Porter, STFC Daresbury Lab

from __future__ import print_function
from psyclone.parse.algorithm import parse
from psyclone.psyGen import PSyFactory
from psyclone.psyGen import TransInfo
api = "dynamo0.1"
ast, invokeInfo = parse("dynamo_algorithm_mod.F90", api=api)
psy = PSyFactory(api).create(invokeInfo)
print(psy.gen)

print(psy.invokes.names)

schedule = psy.invokes.get('invoke_0').schedule
schedule.view()

t = TransInfo()
print(t.list)

lf = t.get_trans_name('LoopFuseTrans')

schedule.view()
new_schedule, memento = lf.apply(schedule.children[0], schedule.children[1])
new_schedule.view()

psy.invokes.get('invoke_0').schedule = new_schedule
print(psy.gen)
Esempio n. 16
0
if __name__ == "__main__":
    from psyclone.nemo import NemoKern, NemoImplicitLoop
    API = "nemo"
    _, INVOKEINFO = parse("traldf_iso.F90", api=API)
    PSY = PSyFactory(API).create(INVOKEINFO)
    print(PSY.gen)

    print("Invokes found:")
    print(PSY.invokes.names)

    SCHED = PSY.invokes.get('tra_ldf_iso').schedule
    SCHED.view()

    TRANS_INFO = TransInfo()
    print(TRANS_INFO.list)
    OMP_TRANS = TRANS_INFO.get_trans_name('OMPParallelLoopTrans')
    DO_TRANS = TRANS_INFO.get_trans_name('NemoExplicitLoopTrans')
    # Transform each implicit loop to make the outermost loop explicit
    for loop in SCHED.loops():
        if isinstance(loop, NemoImplicitLoop):
            _, _ = DO_TRANS.apply(loop)
    for loop in SCHED.loops():
        # TODO loop.kernel method needs extending to cope with
        # multiple kernels
        kernels = loop.walk(loop.children, NemoKern)
        if kernels and loop.loop_type == "levels":
            sched, _ = OMP_TRANS.apply(loop)

    SCHED.view()

    PSY.invokes.get('tra_ldf_iso').schedule = SCHED