Esempio n. 1
0
def trans(psy):
    ''' PSyclone transformation script for the dynamo0p3 api to apply
    loop fusion and OpenMP for a particular example.'''
    otrans = OMPParallelTrans()
    ltrans = Dynamo0p3OMPLoopTrans()
    ftrans = DynamoLoopFuseTrans()

    invoke = psy.invokes.invoke_list[0]
    schedule = invoke.schedule

    # loop fuse the two builtin kernels
    schedule, _ = ftrans.apply(schedule.children[0],
                               schedule.children[1],
                               same_space=True)

    # Add an OpenMP do directive to the resultant loop-fused loop,
    # specifying that we want reproducible reductions
    schedule, _ = ltrans.apply(schedule.children[0], reprod=True)

    # Add an OpenMP parallel directive around the OpenMP do directive
    schedule, _ = otrans.apply(schedule.children[0])

    # take a look at what we've done
    schedule.view()
    schedule.dag()

    return psy
Esempio n. 2
0
def test_gocean_omp_parallel():
    '''Test that an OMP PARALLEL directive in a 'classical' API (gocean here)
    is created correctly.
    '''

    from psyclone.transformations import OMPParallelTrans

    _, invoke = get_invoke("single_invoke.f90", "gocean1.0", idx=0)

    omp = OMPParallelTrans()
    omp_sched, _ = omp.apply(invoke.schedule[0])

    # Now remove the GOKern (since it's not yet supported in the
    # visitor pattern) and replace it with a simple assignment
    # TODO: #440 tracks this
    replace_child_with_assignment(omp_sched[0])

    # omp_sched is a GOInvokeSchedule, which is not yet supported.
    # So only convert starting from the OMPParallelDirective
    fvisitor = FortranWriter()
    result = fvisitor(omp_sched[0])
    correct = '''!$omp parallel
  a=b
!$omp end parallel'''
    assert correct in result

    cvisitor = CWriter()
    # Remove newlines for easier RE matching
    result = cvisitor(omp_sched[0])
    correct = '''#pragma omp parallel
{
  a = b;
}'''
    result = cvisitor(omp_sched[0])
    assert correct in result
def test_omp_region_no_slice_no_const_bounds():
    ''' Test that we generate the correct code when we apply an OpenMP
    PARALLEL region transformation to a list of nodes when the Schedule
    has been transformed to use loop-bound look-ups '''

    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule
    ompr = OMPParallelTrans()
    cbtrans = GOConstLoopBoundsTrans()

    newsched, _ = cbtrans.apply(schedule, const_bounds=False)
    omp_schedule, _ = ompr.apply(newsched.children)
    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule
    # Store the results of applying this code transformation as
    # a string
    gen = str(psy.gen)
    gen = gen.lower()
    # Iterate over the lines of generated code
    within_omp_region = False
    call_count = 0
    for line in gen.split('\n'):
        if '!$omp parallel default' in line:
            within_omp_region = True
        if '!$omp end parallel' in line:
            within_omp_region = False
        if ' call ' in line and within_omp_region:
            call_count += 1
    assert call_count == 3
def test_omp_region_retains_kernel_order2():
    ''' Test that applying the OpenMP PARALLEL region transformation
    to a sub-set of nodes (first 2 of 3) does not change their
    ordering '''
    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule

    ompr = OMPParallelTrans()

    omp_schedule, _ = ompr.apply(schedule.children[0:2])

    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule
    # Store the results of applying this code transformation as
    # a string
    gen = str(psy.gen)
    gen = gen.lower()

    # Iterate over the lines of generated code
    cu_idx = -1
    cv_idx = -1
    ts_idx = -1
    for idx, line in enumerate(gen.split('\n')):
        if 'call compute_cu' in line:
            cu_idx = idx
        if 'call compute_cv' in line:
            cv_idx = idx
        if 'call time_smooth' in line:
            ts_idx = idx

    # Kernels should be in order {compute_cu, compute_cv, time_smooth}
    assert cu_idx < cv_idx and cv_idx < ts_idx
def test_openmp_region():
    ''' Test the application of an OpenMP parallel region transformation
    to a single loop '''
    psy, invoke = get_invoke("algorithm/1_single_function.f90",
                             TEST_API,
                             name="invoke_0_testkern_type")
    schedule = invoke.schedule
    rtrans = OMPParallelTrans()
    rtrans.apply(schedule.children[0])
    gen = str(psy.gen)

    # Check that our list of private variables is correct
    assert "!$omp parallel default(shared), private(cell,map)" in gen

    for idx, line in enumerate(gen.split('\n')):
        if "!$omp parallel default(shared)" in line:
            startpara_idx = idx
        if "DO cell=1,f1%get_ncell()" in line:
            do_idx = idx
        if "CALL f1%vspace%get_cell_dofmap(cell, map)" in line:
            dmap_idx = idx
        if "CALL testkern_code(nlayers, ndf, map, f1%data, "\
           "f2%data, m1%data)" in line:
            kcall_idx = idx
        if "END DO" in line:
            enddo_idx = idx
        if "!$omp end parallel" in line:
            endpara_idx = idx
    assert do_idx == startpara_idx + 1
    assert dmap_idx == do_idx + 1
    assert kcall_idx == dmap_idx + 1
    assert enddo_idx == kcall_idx + 1
    assert endpara_idx == enddo_idx + 1
def test_omp_region_no_slice():
    ''' Test that we can pass the OpenMP PARALLEL region transformation
    a list of nodes specified as node.children '''
    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule
    ompr = OMPParallelTrans()

    omp_schedule, _ = ompr.apply(schedule.children)
    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule
    # Store the results of applying this code transformation as
    # a string
    gen = str(psy.gen)
    gen = gen.lower()
    # Iterate over the lines of generated code
    within_omp_region = False
    call_count = 0
    for line in gen.split('\n'):
        if '!$omp parallel default' in line:
            within_omp_region = True
        if '!$omp end parallel' in line:
            within_omp_region = False
        if ' call ' in line and within_omp_region:
            call_count += 1
    assert call_count == 3
Esempio n. 7
0
def trans(psy):
    '''
    Applies PSyclone colouring and OpenMP transformations.
    '''
    ctrans = Dynamo0p3ColourTrans()
    otrans = Dynamo0p3OMPLoopTrans()
    oregtrans = OMPParallelTrans()

    # Loop over all of the Invokes in the PSy object
    for invoke in psy.invokes.invoke_list:

        print("Transforming invoke '{0}' ...".format(invoke.name))
        schedule = invoke.schedule

        # Colour loops over cells unless they are on discontinuous
        # spaces (W3, WTHETA and W2V) or over dofs
        for loop in schedule.loops():
            if loop.iteration_space == "cells" \
                and loop.field_space.orig_name \
                    not in DISCONTINUOUS_FUNCTION_SPACES:
                schedule, _ = ctrans.apply(loop)

        # Add OpenMP to loops over colours.
        for loop in schedule.loops():
            if loop.loop_type != "colours":
                schedule, _ = oregtrans.apply(loop)
                schedule, _ = otrans.apply(loop, reprod=True)

        schedule.view()

    return psy
Esempio n. 8
0
def test_omp_transform():
    '''Tests that the profiling transform works correctly with OMP
     parallelisation.'''

    _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0",
                           name="invoke_loop1", dist_mem=False)
    schedule = invoke.schedule
    # This test expects constant loop bounds
    schedule._const_loop_bounds = True

    prt = ProfileTrans()
    omp_loop = GOceanOMPLoopTrans()
    omp_par = OMPParallelTrans()

    # Parallelise the first loop:
    omp_loop.apply(schedule[0])
    omp_par.apply(schedule[0])
    prt.apply(schedule[0])

    correct = (
        "      CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", "
        "\"invoke_loop1:bc_ssh_code:r0\", 0, 0)\n"
        "      !$omp parallel default(shared), private(i,j)\n"
        "      !$omp do schedule(static)\n"
        "      DO j=2,jstop\n"
        "        DO i=2,istop\n"
        "          CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n"
        "        END DO\n"
        "      END DO\n"
        "      !$omp end do\n"
        "      !$omp end parallel\n"
        "      CALL profile_psy_data%PostEnd")
    code = str(invoke.gen())
    assert correct in code

    # Now add another profile node between the omp parallel and omp do
    # directives:
    prt.apply(schedule[0].profile_body[0].dir_body[0])

    code = str(invoke.gen())

    correct = \
        "CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " + \
        '''"invoke_loop1:bc_ssh_code:r0", 0, 0)
      !$omp parallel default(shared), private(i,j)
      CALL profile_psy_data_1%PreStart("psy_test27_loop_swap", ''' + \
        '''"invoke_loop1:bc_ssh_code:r1", 0, 0)
      !$omp do schedule(static)
      DO j=2,jstop
        DO i=2,istop
          CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)
        END DO
      END DO
      !$omp end do
      CALL profile_psy_data_1%PostEnd
      !$omp end parallel
      CALL profile_psy_data%PostEnd'''

    assert correct in code
Esempio n. 9
0
def test_omp_transform():
    '''Tests that the profiling transform works correctly with OMP
     parallelisation.'''

    _, invoke = get_invoke("test27_loop_swap.f90",
                           "gocean1.0",
                           name="invoke_loop1")
    schedule = invoke.schedule

    prt = ProfileRegionTrans()
    omp_loop = GOceanOMPLoopTrans()
    omp_par = OMPParallelTrans()

    # Parallelise the first loop:
    sched1, _ = omp_loop.apply(schedule.children[0])
    sched2, _ = omp_par.apply(sched1.children[0])
    sched3, _ = prt.apply(sched2.children[0])

    correct = (
        "      CALL ProfileStart(\"boundary_conditions_ne_offset_mod\", "
        "\"bc_ssh_code\", profile)\n"
        "      !$omp parallel default(shared), private(i,j)\n"
        "      !$omp do schedule(static)\n"
        "      DO j=2,jstop\n"
        "        DO i=2,istop\n"
        "          CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n"
        "        END DO \n"
        "      END DO \n"
        "      !$omp end do\n"
        "      !$omp end parallel\n"
        "      CALL ProfileEnd(profile)")
    code = str(invoke.gen())
    assert correct in code

    # Now add another profile node between the omp parallel and omp do
    # directives:
    sched3, _ = prt.apply(sched3.children[0].children[0].children[0])

    code = str(invoke.gen())

    correct = '''      CALL ProfileStart("boundary_conditions_ne_offset_mod", \
"bc_ssh_code", profile)
      !$omp parallel default(shared), private(i,j)
      CALL ProfileStart("boundary_conditions_ne_offset_mod", "bc_ssh_code_1", \
profile_1)
      !$omp do schedule(static)
      DO j=2,jstop
        DO i=2,istop
          CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)
        END DO\x20
      END DO\x20
      !$omp end do
      CALL ProfileEnd(profile_1)
      !$omp end parallel
      CALL ProfileEnd(profile)'''
    assert correct in code
def test_omp_region_with_wrong_arg_type():
    ''' Test that the OpenMP PARALLEL region transformation
        raises an appropriate error if passed something that is not
        a list of Nodes or a single Node. '''
    _, invoke = get_invoke("single_invoke_three_kernels.f90", 0)

    ompr = OMPParallelTrans()

    with pytest.raises(TransformationError):
        _, _ = ompr.apply(invoke)
Esempio n. 11
0
def test_omp_add_region_invalid_data_move():
    ''' Check that _add_region() raises the expected error if an invalid
    value for data_movement is supplied. '''
    otrans = OMPParallelTrans()
    _, invoke_info = get_invoke("explicit_do.f90", api=API, idx=0)
    schedule = invoke_info.schedule
    otrans.apply([schedule[0]])
    ompdir = schedule[0]
    with pytest.raises(InternalError) as err:
        ompdir._add_region("DATA", "END DATA", data_movement="analyse")
    assert ("the data_movement='analyse' option is only valid for an "
            "OpenACC directive" in str(err.value))
Esempio n. 12
0
def test_node_list_ompparallel_gocean1p0():
    ''' Test that applying Extract Transformation on a list of Nodes
    enclosed within an OMP Parallel Region produces the correct result
    in GOcean1.0 API. '''
    from psyclone.transformations import GOceanOMPLoopTrans, OMPParallelTrans

    etrans = GOceanExtractRegionTrans()
    ltrans = GOceanOMPLoopTrans()
    otrans = OMPParallelTrans()

    # Test a Loop nested within the OMP Parallel DO Directive
    _, invoke_info = parse(os.path.join(GOCEAN_BASE_PATH,
                                        "single_invoke_three_kernels.f90"),
                           api=GOCEAN_API)
    psy = PSyFactory(GOCEAN_API, distributed_memory=False).create(invoke_info)
    invoke = psy.invokes.invoke_list[0]
    schedule = invoke.schedule

    # Apply GOceanOMPParallelLoopTrans to the first two Loops
    schedule, _ = ltrans.apply(schedule.children[0])
    schedule, _ = ltrans.apply(schedule.children[1])
    # and enclose them within a parallel region
    schedule, _ = otrans.apply(schedule.children[0:2])
    # Now enclose the parallel region within an ExtractNode (inserted
    # at the previous location of the OMPParallelDirective
    schedule, _ = etrans.apply(schedule.children[0])

    code = str(psy.gen)
    output = ("      ! ExtractStart\n"
              "      ! CALL write_extract_arguments(argument_list)\n"
              "      !\n"
              "      !$omp parallel default(shared), private(i,j)\n"
              "      !$omp do schedule(static)\n"
              "      DO j=2,jstop\n"
              "        DO i=2,istop+1\n"
              "          CALL compute_cu_code(i, j, cu_fld%data, "
              "p_fld%data, u_fld%data)\n"
              "        END DO \n"
              "      END DO \n"
              "      !$omp end do\n"
              "      !$omp do schedule(static)\n"
              "      DO j=2,jstop+1\n"
              "        DO i=2,istop\n"
              "          CALL compute_cv_code(i, j, cv_fld%data, "
              "p_fld%data, v_fld%data)\n"
              "        END DO \n"
              "      END DO \n"
              "      !$omp end do\n"
              "      !$omp end parallel\n"
              "      !\n"
              "      ! ExtractEnd\n")
    assert output in code
Esempio n. 13
0
def test_nemo_omp_parallel():
    '''Tests if an OpenMP parallel directive in NEMO is handled correctly.
    '''
    # Generate fparser2 parse tree from Fortran code.
    code = '''
        module test
        contains
        subroutine tmp()
          integer :: i, a
          integer, dimension(:) :: b
          do i = 1, 20, 2
            a = 2 * i
            b(i) = b(i) + a
          enddo
        end subroutine tmp
        end module test'''
    schedule = create_schedule(code, "tmp")
    from psyclone.transformations import OMPParallelTrans

    # Now apply a parallel transform
    omp_par = OMPParallelTrans()
    # Note that the loop is not handled as nemo kernel, so the
    # omp node-type-check will find the assignment statement and
    # prevent application of omp parallel to the loop. So
    # disable the node type check so that omp parallel is applied.
    omp_par.apply(schedule[0], {"node-type-check": False})

    fvisitor = FortranWriter()
    result = fvisitor(schedule)
    correct = '''!$omp parallel private(a,i)
    do i = 1, 20, 2
      a=2 * i
      b(i)=b(i) + a
    enddo
!$omp end parallel'''
    assert correct in result

    cvisitor = CWriter()
    result = cvisitor(schedule[0])
    correct = '''#pragma omp parallel private(a,i)
{
  for(i=1; i<=20; i+=2)
  {
    a = (2 * i);
    b[i] = (b[i] + a);
  }
}'''
    result = cvisitor(schedule[0])
    assert correct in result
Esempio n. 14
0
def test_parallelregion_refuse_codeblock():
    ''' Check that ParallelRegionTrans.validate() rejects a loop nest that
    encloses a CodeBlock. We use OMPParallelTrans as ParallelRegionTrans
    is abstract. '''
    otrans = OMPParallelTrans()
    # Construct a valid Loop in the PSyIR with a CodeBlock in its body
    parent = Loop.create(DataSymbol("ji", INTEGER_TYPE),
                         Literal("1",
                                 INTEGER_TYPE), Literal("10", INTEGER_TYPE),
                         Literal("1", INTEGER_TYPE),
                         [CodeBlock([], CodeBlock.Structure.STATEMENT, None)])
    with pytest.raises(TransformationError) as err:
        otrans.validate([parent])
    assert ("Nodes of type 'CodeBlock' cannot be enclosed by a "
            "OMPParallelTrans transformation" in str(err.value))
def test_omp_region_nodes_not_children_of_same_schedule():
    ''' Test that we raise appropriate error if user attempts
    to put a region around nodes that are not children of
    the same schedule '''
    _, invoke1 = get_invoke("test12_two_invokes_two_kernels.f90", 0)
    schedule1 = invoke1.schedule
    _, invoke2 = get_invoke("test12_two_invokes_two_kernels.f90", 1)
    schedule2 = invoke2.schedule

    ompr = OMPParallelTrans()

    # Attempt to put an OpenMP parallel region the loops from the
    # two different schedules
    with pytest.raises(TransformationError):
        _, _ = ompr.apply([schedule1.children[0], schedule2.children[0]])
def test_omp_parallel_region_inside_parallel_do():
    ''' Test that a generation error is raised if we attempt
    to have an OpenMP parallel region within an OpenMP
    parallel do (with the latter applied first) '''
    _, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule

    ompl = GOceanOMPParallelLoopTrans()
    ompr = OMPParallelTrans()

    # Put an OpenMP parallel do directive around one of the loops
    _, _ = ompl.apply(schedule.children[1])

    # Now attempt to put a parallel region inside that parallel do
    with pytest.raises(TransformationError):
        _, _ = ompr.apply([schedule.children[1].children[0]])
def test_omp_region_with_single_loop():
    ''' Test that we can pass the OpenMP PARALLEL region transformation
        a single node in a schedule '''
    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule

    ompr = OMPParallelTrans()
    cbtrans = GOConstLoopBoundsTrans()

    omp_schedule, _ = ompr.apply(schedule.children[1])

    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule
    # Store the results of applying this code transformation as
    # a string
    gen = str(psy.gen)
    gen = gen.lower()

    # Iterate over the lines of generated code
    within_omp_region = False
    call_count = 0
    for line in gen.split('\n'):
        if '!$omp parallel default' in line:
            within_omp_region = True
        if '!$omp end parallel' in line:
            within_omp_region = False
        if ' call ' in line and within_omp_region:
            call_count += 1

    assert call_count == 1

    # Repeat the test after turning off constant loop bounds
    newsched, _ = cbtrans.apply(omp_schedule, const_bounds=False)
    invoke.schedule = newsched
    gen = str(psy.gen)
    gen = gen.lower()
    within_omp_region = False
    call_count = 0
    for line in gen.split('\n'):
        if '!$omp parallel default' in line:
            within_omp_region = True
        if '!$omp end parallel' in line:
            within_omp_region = False
        if ' call ' in line and within_omp_region:
            call_count += 1

    assert call_count == 1
Esempio n. 18
0
def test_omp_do_update():
    '''Check the OMPDoDirective update function.'''
    from psyclone.transformations import OMPLoopTrans, OMPParallelTrans
    from psyclone.psyGen import OMPDoDirective
    _, invoke_info = parse(os.path.join(BASE_PATH, "imperfect_nest.f90"),
                           api=API,
                           line_length=False)
    psy = PSyFactory(API, distributed_memory=False).create(invoke_info)
    schedule = psy.invokes.get('imperfect_nest').schedule
    par_trans = OMPParallelTrans()
    loop_trans = OMPLoopTrans()
    new_sched, _ = par_trans.apply(
        schedule[0].loop_body[1].else_body[0].else_body[0])
    new_sched, _ = loop_trans.apply(
        new_sched[0].loop_body[1].else_body[0].else_body[0].children[0])
    gen_code = str(psy.gen).lower()
    correct = '''      !$omp parallel default(shared), private(ji,jj)
      !$omp do schedule(static)
      do jj = 1, jpj, 1
        do ji = 1, jpi, 1
          zdkt(ji, jj) = (ptb(ji, jj, jk - 1, jn) - ptb(ji, jj, jk, jn)) * \
wmask(ji, jj, jk)
        end do
      end do
      !$omp end do
      !$omp end parallel'''
    assert correct in gen_code
    directive = new_sched[0].loop_body[1].else_body[0].else_body[0]\
        .children[0]
    assert isinstance(directive, OMPDoDirective)

    # Call update a second time and make sure that this does not
    # trigger the whole update process again, and we get the same ast
    old_ast = directive.ast
    directive.update()
    assert directive.ast is old_ast

    # Remove the existing AST, so we can do more tests:
    directive.ast = None
    # Make the schedule invalid by adding a second child to the
    # OMPParallelDoDirective
    directive.children.append(new_sched[0].loop_body[3])

    with pytest.raises(GenerationError) as err:
        _ = directive.update()
    assert ("An OpenMP DO can only be applied to a single loop but "
            "this Node has 2 children:" in str(err))
Esempio n. 19
0
def test_omp_parallel():
    ''' Check insertion of an OpenMP parallel region containing a single,
    explicit loop. '''
    otrans = OMPParallelTrans()
    psy, invoke_info = get_invoke("explicit_do.f90", api=API, idx=0)
    schedule = invoke_info.schedule
    otrans.apply([schedule[0]])
    gen_code = str(psy.gen).lower()
    assert ("  !$omp parallel default(shared), private(ji,jj,jk)\n"
            "  do jk = 1, jpk\n"
            "    do jj = 1, jpj\n"
            "      do ji = 1, jpi\n"
            "        umask(ji, jj, jk) = ji * jj * jk / r\n"
            "      end do\n"
            "    end do\n"
            "  end do\n"
            "  !$omp end parallel\n" in gen_code)
Esempio n. 20
0
def test_nemo_omp_parallel():
    '''Tests if an OpenMP parallel directive in NEMO is handled correctly.
    '''
    # Generate fparser2 parse tree from Fortran code.
    code = '''
        module test
        contains
        subroutine tmp()
          integer :: i, a
          integer, dimension(:) :: b
          do i = 1, 20, 2
            a = 2 * i
            b(i) = b(i) + a
          enddo
        end subroutine tmp
        end module test'''
    schedule = create_schedule(code, "tmp")
    from psyclone.transformations import OMPParallelTrans

    # Now apply a parallel transform
    omp_par = OMPParallelTrans()
    omp_par.apply(schedule[0])

    fvisitor = FortranWriter()
    result = fvisitor(schedule)
    correct = '''!$omp parallel private(a,i)
    do i = 1, 20, 2
      a=2 * i
      b(i)=b(i) + a
    enddo
!$omp end parallel'''
    assert correct in result

    cvisitor = CWriter()
    result = cvisitor(schedule[0])
    correct = '''#pragma omp parallel private(a,i)
{
  for(i=1; i<=20; i+=2)
  {
    a = (2 * i);
    b[i] = (b[i] + a);
  }
}'''
    result = cvisitor(schedule[0])
    assert correct in result
Esempio n. 21
0
def test_omp_parallel_errs():
    ''' Check that we raise the expected errors when incorrectly attempting
    to add an OpenMP parallel region containing more than one node. '''
    otrans = OMPParallelTrans()
    psy, invoke_info = get_invoke("imperfect_nest.f90", api=API, idx=0)
    schedule = invoke_info.schedule

    # Apply the OMP Parallel transformation so as to enclose the last two
    # loop nests (Python's slice notation is such that the expression below
    # gives elements 2-3).
    otrans.apply(schedule[0].loop_body[2:4])
    directive = schedule[0].loop_body[2]
    # Break the AST by deleting some of it
    schedule[0].ast.content.remove(directive.children[0].ast)
    with pytest.raises(InternalError) as err:
        _ = psy.gen
    assert ("Failed to find locations to insert begin/end directives"
            in str(err.value))
def test_omp_region_nodes_not_children_of_same_parent():
    ''' Test that we raise appropriate error if user attempts
    to put a region around nodes that are not children of
    the same parent '''
    _, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule

    ompl = GOceanOMPParallelLoopTrans()
    ompr = OMPParallelTrans()

    # Put an OpenMP parallel do around the first loop in the schedule
    _, _ = ompl.apply(schedule.children[0])

    # Attempt to put an OpenMP parallel region around that same loop
    # (which is now a child of an OpenMP loop directive) and the
    # second loop in the schedule
    with pytest.raises(TransformationError):
        _, _ = ompr.apply(
            [schedule.children[0].children[0], schedule.children[1]])
Esempio n. 23
0
def test_omp_do_update():
    '''Check the OMPDoDirective update function.'''
    psy, invoke = get_invoke("imperfect_nest.f90", api=API, idx=0)
    schedule = invoke.schedule
    par_trans = OMPParallelTrans()
    loop_trans = OMPLoopTrans()
    new_sched, _ = par_trans.apply(schedule[0].loop_body[1]
                                   .else_body[0].else_body[0])
    new_sched, _ = loop_trans.apply(new_sched[0].loop_body[1]
                                    .else_body[0].else_body[0].dir_body[0])
    gen_code = str(psy.gen).lower()
    correct = '''      !$omp parallel default(shared), private(ji,jj)
      !$omp do schedule(static)
      do jj = 1, jpj, 1
        do ji = 1, jpi, 1
          zdkt(ji, jj) = (ptb(ji, jj, jk - 1, jn) - ptb(ji, jj, jk, jn)) * \
wmask(ji, jj, jk)
        end do
      end do
      !$omp end do
      !$omp end parallel'''
    assert correct in gen_code
    directive = new_sched[0].loop_body[1].else_body[0].else_body[0]\
        .dir_body[0]
    assert isinstance(directive, OMPDoDirective)

    # Call update a second time and make sure that this does not
    # trigger the whole update process again, and we get the same ast
    old_ast = directive.ast
    directive.update()
    assert directive.ast is old_ast

    # Remove the existing AST, so we can do more tests:
    directive.ast = None
    # Make the schedule invalid by adding a second child to the
    # OMPParallelDoDirective
    directive.dir_body.children.append(new_sched[0].loop_body[3])

    with pytest.raises(GenerationError) as err:
        _ = directive.update()
    assert ("An OpenMP DO can only be applied to a single loop but "
            "this Node has 2 children:" in str(err.value))
Esempio n. 24
0
def trans(psy):
    '''PSyclone transformation script for the dynamo0p3 API that applies
    loop colouring and OpenMP parallel loop parallelisation. It also
    outputs a textual representation of the transformated PSyIR.

    :param psy: a PSyclone PSy object which captures the algorithm and \
        kernel information required by PSyclone.
    :type psy: subclass of :py:class:`psyclone.psyGen.PSy`

    '''
    otrans = DynamoOMPParallelLoopTrans()
    ctrans = Dynamo0p3ColourTrans()
    ptrans = OMPParallelTrans()
    ltrans = Dynamo0p3OMPLoopTrans()
    const = LFRicConstants()

    for invoke in psy.invokes.invoke_list:
        schedule = invoke.schedule

        # Colour any loops that need colouring
        for loop in schedule.walk(Loop):
            if (loop.field_space.orig_name
                    not in const.VALID_DISCONTINUOUS_NAMES
                    and loop.iteration_space == "cell_column"):
                ctrans.apply(loop)

        # Add OpenMP parallel do directives to the loops
        for loop in schedule.walk(Loop):
            try:
                # Make sure reductions are reproducible
                if loop.reductions():
                    ptrans.apply(loop)
                    ltrans.apply(loop, {"reprod": True})
                else:
                    otrans.apply(loop)
            except TransformationError as info:
                print(str(info.value))

        # take a look at what we've done
        schedule.view()

        return psy
def test_omp_region_with_children_of_different_types():
    ''' Test that we can generate code if we have an
    OpenMP parallel region enclosing children of different types. '''
    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule

    ompl = GOceanOMPLoopTrans()
    ompr = OMPParallelTrans()

    # Put an OpenMP do directive around one loop
    omp_schedule, _ = ompl.apply(schedule.children[1])

    # Now enclose all of the children within a parallel region
    schedule, _ = ompr.apply(omp_schedule.children)

    # Replace the original loop schedule with the transformed one
    invoke.schedule = schedule

    # Attempt to generate the transformed code
    _ = psy.gen
Esempio n. 26
0
def test_omp_parallel():
    ''' Check insertion of an OpenMP parallel region containing a single,
    explicit loop. '''
    from psyclone.transformations import OMPParallelTrans
    otrans = OMPParallelTrans()
    _, invoke_info = parse(os.path.join(BASE_PATH, "explicit_do.f90"),
                           api=API, line_length=False)
    psy = PSyFactory(API, distributed_memory=False).create(invoke_info)
    schedule = psy.invokes.get('explicit_do').schedule
    schedule, _ = otrans.apply([schedule.children[0]])
    gen_code = str(psy.gen).lower()
    assert ("  !$omp parallel default(shared), private(jk,jj,ji)\n"
            "  do jk = 1, jpk\n"
            "    do jj = 1, jpj\n"
            "      do ji = 1, jpi\n"
            "        umask(ji, jj, jk) = ji * jj * jk / r\n"
            "      end do\n"
            "    end do\n"
            "  end do\n"
            "  !$omp end parallel\n" in gen_code)
Esempio n. 27
0
def test_omp_parallel_multi():
    ''' Check insertion of an OpenMP parallel region containing more than
    one node. '''
    from psyclone.transformations import OMPParallelTrans
    from psyclone.psyGen import OMPParallelDirective
    otrans = OMPParallelTrans()
    _, invoke_info = parse(os.path.join(BASE_PATH, "imperfect_nest.f90"),
                           api=API,
                           line_length=False)
    psy = PSyFactory(API, distributed_memory=False).create(invoke_info)
    schedule = psy.invokes.get('imperfect_nest').schedule
    schedule.view()
    # Apply the OMP Parallel transformation so as to enclose the last two
    # loop nests (Python's slice notation is such that the expression below
    # gives elements 2-3).
    new_sched, _ = otrans.apply(schedule[0].loop_body[2:4])
    new_sched.view()
    gen_code = str(psy.gen).lower()
    assert ("    !$omp parallel default(shared), private(ji,jj,zabe1,zcof1,"
            "zmsku)\n"
            "    do jj = 1, jpjm1\n"
            "      do ji = 1, fs_jpim1\n"
            "        zabe1 = pahu(ji, jj, jk) * e2_e1u(ji, jj) * "
            "e3u_n(ji, jj, jk)\n" in gen_code)
    assert ("    do jj = 2, jpjm1\n"
            "      do ji = fs_2, fs_jpim1\n"
            "        pta(ji, jj, jk, jn) = pta(ji, jj, jk, jn) + "
            "zsign * (zftu(ji, jj, jk) - zftu(ji - 1, jj, jk) + "
            "zftv(ji, jj, jk) - zftv(ji, jj - 1, jk)) * r1_e1e2t(ji, jj) / "
            "e3t_n(ji, jj, jk)\n"
            "      end do\n"
            "    end do\n"
            "    !$omp end parallel\n" in gen_code)
    directive = new_sched[0].loop_body[2]
    assert isinstance(directive, OMPParallelDirective)

    # Check that further calls to the update() method don't change the
    # stored AST.
    old_ast = directive.ast
    directive.update()
    assert old_ast is directive.ast
Esempio n. 28
0
def test_omp_parallel_errs():
    ''' Check that we raise the expected errors when incorrectly attempting
    to add an OpenMP parallel region containing more than one node. '''
    from psyclone.transformations import OMPParallelTrans
    otrans = OMPParallelTrans()
    _, invoke_info = parse(os.path.join(BASE_PATH, "imperfect_nest.f90"),
                           api=API, line_length=False)
    psy = PSyFactory(API, distributed_memory=False).create(invoke_info)
    schedule = psy.invokes.get('imperfect_nest').schedule
    schedule.view()
    # Apply the OMP Parallel transformation so as to enclose the last two
    # loop nests (Python's slice notation is such that the expression below
    # gives elements 2-3).
    new_sched, _ = otrans.apply(schedule.children[0].children[2:4])
    directive = new_sched.children[0].children[2]
    # Break the AST by deleting some of it
    _ = new_sched.children[0]._ast.content.remove(directive._children[0]._ast)
    with pytest.raises(InternalError) as err:
        _ = psy.gen
    assert ("Failed to find locations to insert begin/end directives" in
            str(err))
Esempio n. 29
0
def trans(psy):
    ''' PSyclone transformation script for the dynamo0p3 API to apply
    loop fusion and OpenMP for a particular example.'''
    otrans = OMPParallelTrans()
    ltrans = Dynamo0p3OMPLoopTrans()
    ftrans = LFRicLoopFuseTrans()

    invoke = psy.invokes.invoke_list[0]
    schedule = invoke.schedule

    config = Config.get()
    if config.api_conf("dynamo0.3").compute_annexed_dofs and \
       config.distributed_memory:
        # We can't loop fuse as the loop bounds differ so add
        # OpenMP parallel do directives to the loops
        otrans.apply(schedule.children[0])
        otrans.apply(schedule.children[1])
    else:
        # Loop fuse the two built-in kernels. The 'same_space' flag needs to
        # be set as built-ins are over ANY_SPACE.
        ftrans.apply(schedule[0], schedule[1], {"same_space": True})

        # Add an OpenMP do directive to the resultant loop-fused loop,
        # specifying that we want reproducible reductions
        ltrans.apply(schedule.children[0], {"reprod": True})

        # Add an OpenMP parallel directive around the OpenMP do directive
        otrans.apply(schedule.children[0])

    # take a look at what we've done
    schedule.view()
    schedule.dag()

    return psy
def test_omp_region_before_loops_trans():
    ''' Test of the OpenMP PARALLEL region transformation where
    we do the region transformation before the loop
    transformations. '''
    psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0)
    schedule = invoke.schedule

    # Put all of the loops in the schedule within a single
    # OpenMP region
    ompr = OMPParallelTrans()
    omp_schedule, _ = ompr.apply(schedule.children)

    # Put an OpenMP do directive around each loop contained
    # in the region
    ompl = GOceanOMPLoopTrans()
    for child in omp_schedule.children[0].children:
        schedule, _ = ompl.apply(child)
        omp_schedule = schedule

    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule

    # Store the results of applying this code transformation as
    # a string
    gen = str(psy.gen)

    # Iterate over the lines of generated code
    omp_region_idx = -1
    omp_do_idx = -1
    for idx, line in enumerate(gen.split('\n')):
        if '!$omp parallel default' in line:
            omp_region_idx = idx
        if '!$omp do' in line:
            omp_do_idx = idx
        if 'DO j=' in line:
            break

    assert omp_region_idx != -1
    assert omp_do_idx != -1
    assert omp_do_idx - omp_region_idx == 1