def test_omp_region_no_slice_no_const_bounds():
    ''' Test that we generate the correct code when we apply an OpenMP
    PARALLEL region transformation to a list of nodes when the Schedule
    has been transformed to use loop-bound look-ups '''

    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule
    ompr = OMPParallelTrans()
    cbtrans = GOConstLoopBoundsTrans()

    newsched, _ = cbtrans.apply(schedule, const_bounds=False)
    omp_schedule, _ = ompr.apply(newsched.children)
    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule
    # Store the results of applying this code transformation as
    # a string
    gen = str(psy.gen)
    gen = gen.lower()
    # Iterate over the lines of generated code
    within_omp_region = False
    call_count = 0
    for line in gen.split('\n'):
        if '!$omp parallel default' in line:
            within_omp_region = True
        if '!$omp end parallel' in line:
            within_omp_region = False
        if ' call ' in line and within_omp_region:
            call_count += 1
    assert call_count == 3
def test_const_loop_bounds_invalid_offset():
    ''' Test that we raise an appropriate error if we attempt to generate
    code with constant loop bounds for a kernel that expects an
    unsupported grid-offset '''
    psy, invoke = get_invoke("test26_const_bounds_invalid_offset.f90", 0)
    cbtrans = GOConstLoopBoundsTrans()
    schedule = invoke.schedule
    newsched, _ = cbtrans.apply(schedule, const_bounds=True)
    invoke.schedule = newsched
    with pytest.raises(GenerationError):
        _ = psy.gen
def test_const_loop_bounds_not_schedule():
    ''' Check that we raise an error if we attempt to apply the
    constant loop-bounds transformation to something that is
    not a Schedule '''
    _, invoke = get_invoke("test11_different_iterates_over_"
                           "one_invoke.f90", 0)
    schedule = invoke.schedule
    cbtrans = GOConstLoopBoundsTrans()

    with pytest.raises(TransformationError):
        _, _ = cbtrans.apply(schedule.children[0])
def test_omp_region_retains_kernel_order1():
    ''' Test that applying the OpenMP PARALLEL region transformation
    to a sub-set of nodes (last 2 of three) does not change their
    ordering '''
    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule

    ompr = OMPParallelTrans()
    cbtrans = GOConstLoopBoundsTrans()

    omp_schedule, _ = ompr.apply(schedule.children[1:])

    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule
    # Store the results of applying this code transformation as
    # a string
    gen = str(psy.gen)
    gen = gen.lower()
    # Iterate over the lines of generated code
    cu_idx = -1
    cv_idx = -1
    ts_idx = -1
    for idx, line in enumerate(gen.split('\n')):
        if 'call compute_cu' in line:
            cu_idx = idx
        if 'call compute_cv' in line:
            cv_idx = idx
        if 'call time_smooth' in line:
            ts_idx = idx

    # Kernels should be in order {compute_cu, compute_cv, time_smooth}
    assert cu_idx < cv_idx and cv_idx < ts_idx

    # Repeat after turning off constant loop bounds
    newsched, _ = cbtrans.apply(omp_schedule, const_bounds=False)
    invoke.schedule = newsched
    gen = str(psy.gen)
    gen = gen.lower()
    # Iterate over the lines of generated code
    cu_idx = -1
    cv_idx = -1
    ts_idx = -1
    for idx, line in enumerate(gen.split('\n')):
        if 'call compute_cu' in line:
            cu_idx = idx
        if 'call compute_cv' in line:
            cv_idx = idx
        if 'call time_smooth' in line:
            ts_idx = idx

    # Kernels should be in order {compute_cu, compute_cv, time_smooth}
    assert cu_idx < cv_idx and cv_idx < ts_idx
def test_omp_region_with_single_loop():
    ''' Test that we can pass the OpenMP PARALLEL region transformation
        a single node in a schedule '''
    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule

    ompr = OMPParallelTrans()
    cbtrans = GOConstLoopBoundsTrans()

    omp_schedule, _ = ompr.apply(schedule.children[1])

    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule
    # Store the results of applying this code transformation as
    # a string
    gen = str(psy.gen)
    gen = gen.lower()

    # Iterate over the lines of generated code
    within_omp_region = False
    call_count = 0
    for line in gen.split('\n'):
        if '!$omp parallel default' in line:
            within_omp_region = True
        if '!$omp end parallel' in line:
            within_omp_region = False
        if ' call ' in line and within_omp_region:
            call_count += 1

    assert call_count == 1

    # Repeat the test after turning off constant loop bounds
    newsched, _ = cbtrans.apply(omp_schedule, const_bounds=False)
    invoke.schedule = newsched
    gen = str(psy.gen)
    gen = gen.lower()
    within_omp_region = False
    call_count = 0
    for line in gen.split('\n'):
        if '!$omp parallel default' in line:
            within_omp_region = True
        if '!$omp end parallel' in line:
            within_omp_region = False
        if ' call ' in line and within_omp_region:
            call_count += 1

    assert call_count == 1
def test_loop_fuse_different_iterates_over():
    ''' Test that an appropriate error is raised when we attempt to
    fuse two loops that have differing values of ITERATES_OVER '''
    _, invoke = get_invoke("test11_different_iterates_over_"
                           "one_invoke.f90", 0)
    schedule = invoke.schedule
    lftrans = LoopFuseTrans()
    cbtrans = GOConstLoopBoundsTrans()

    # Attempt to fuse two loops that are iterating over different
    # things
    with pytest.raises(TransformationError):
        _, _ = lftrans.apply(schedule.children[0], schedule.children[1])

    # Turn off constant loop bounds (which should have no effect)
    # and repeat
    newsched, _ = cbtrans.apply(schedule, const_bounds=False)
    with pytest.raises(TransformationError):
        _, _ = lftrans.apply(newsched.children[0], newsched.children[1])
def test_const_loop_bounds_toggle():
    ''' Check that we can toggle constant loop bounds on and off and
    that the default behaviour is "on" '''
    psy, invoke = get_invoke(
        "test11_different_iterates_over_"
        "one_invoke.f90", 0)
    schedule = invoke.schedule
    cbtrans = GOConstLoopBoundsTrans()

    # First check that the generated code uses constant loop
    # bounds by default
    gen = str(psy.gen)

    assert "INTEGER istop, jstop" in gen
    assert "istop = cv_fld%grid%simulation_domain%xstop" in gen
    assert "jstop = cv_fld%grid%simulation_domain%ystop" in gen
    assert "DO j=2,jstop-1" in gen
    assert "DO i=2,istop" in gen

    # Next, check that applying the constant loop-bounds
    # transformation has no effect (in this case)
    newsched, _ = cbtrans.apply(schedule)
    invoke.schedule = newsched
    # Store the generated code as a string
    gen = str(psy.gen)

    assert "INTEGER istop, jstop" in gen
    assert "istop = cv_fld%grid%simulation_domain%xstop" in gen
    assert "jstop = cv_fld%grid%simulation_domain%ystop" in gen
    assert "DO j=2,jstop-1" in gen
    assert "DO i=2,istop" in gen

    # Finally, test that we can turn-off constant loop bounds
    newsched, _ = cbtrans.apply(schedule, const_bounds=False)
    invoke.schedule = newsched
    # Store the generated code as a string
    gen = str(psy.gen)

    assert "DO j=cv_fld%internal%ystart,cv_fld%internal%ystop" in gen
    assert "DO i=cv_fld%internal%xstart,cv_fld%internal%xstop" in gen
    assert "DO j=p_fld%whole%ystart,p_fld%whole%ystop" in gen
    assert "DO i=p_fld%whole%xstart,p_fld%whole%xstop" in gen
def test_omp_region_commutes_with_loop_trans_bounds_lookup():
    ''' Test that the OpenMP PARALLEL region and (orphan) loop
    transformations commute after constant bounds have been
    switched off - i.e. we get the same result
    independent of the order in which they are applied. '''
    psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0)
    schedule = invoke.schedule
    # Turn-off constant loop bounds
    cbtrans = GOConstLoopBoundsTrans()
    newsched, _ = cbtrans.apply(schedule, const_bounds=False)

    # Put an OpenMP do directive around each loop contained
    # in the schedule
    ompl = GOceanOMPLoopTrans()
    for child in newsched.children:
        omp_schedule, _ = ompl.apply(child)

    # Now put an OpenMP parallel region around that set of
    # loops
    ompr = OMPParallelTrans()
    schedule, _ = ompr.apply(omp_schedule.children)

    # Replace the original loop schedule with the transformed one
    invoke.schedule = schedule

    # Store the results of applying this code transformation as
    # a string
    loop_before_region_gen = str(psy.gen)

    # Now we do it again but in the opposite order...
    # ...we re-generate the original schedule here rather than
    # keeping a (deep) copy of it from earlier as that can
    # cause resource problems.
    psy, invoke = get_invoke("single_invoke_two_kernels.f90", 0)
    schedule = invoke.schedule
    # Turn-off constant loop bounds
    cbtrans = GOConstLoopBoundsTrans()
    schedule, _ = cbtrans.apply(schedule, const_bounds=False)

    # Put all of the loops in the schedule within a single
    # OpenMP region
    ompr = OMPParallelTrans()
    omp_schedule, _ = ompr.apply(schedule.children)

    # Put an OpenMP do directive around each loop contained
    # in the region
    ompl = GOceanOMPLoopTrans()
    for child in omp_schedule.children[0].children:
        schedule, _ = ompl.apply(child)
        omp_schedule = schedule

    # Replace the original loop schedule with the transformed one
    invoke.schedule = omp_schedule

    # Store the results of applying this code transformation as
    # a string
    region_before_loop_gen = str(psy.gen)

    assert region_before_loop_gen == loop_before_region_gen
def test_omp_parallel_loop():
    '''Test that we can generate an OMP PARALLEL DO correctly,
    independent of whether or not we are generating constant loop bounds '''
    psy, invoke = get_invoke("single_invoke_three_kernels.f90", 0)
    schedule = invoke.schedule

    omp = GOceanOMPParallelLoopTrans()
    cbtrans = GOConstLoopBoundsTrans()
    omp_sched, _ = omp.apply(schedule.children[0])

    invoke.schedule = omp_sched
    gen = str(psy.gen)
    gen = gen.lower()
    expected = ("!$omp parallel do default(shared), private(j,i), "
                "schedule(static)\n"
                "      do j=2,jstop\n"
                "        do i=2,istop+1\n"
                "          call compute_cu_code(i, j, cu_fld%data, "
                "p_fld%data, u_fld%data)\n"
                "        end do \n"
                "      end do \n"
                "      !$omp end parallel do")
    assert expected in gen

    newsched, _ = cbtrans.apply(omp_sched, const_bounds=False)
    invoke.schedule = newsched
    gen = str(psy.gen)
    gen = gen.lower()
    expected = (
        "      !$omp parallel do default(shared), private(j,i), "
        "schedule(static)\n"
        "      do j=cu_fld%internal%ystart,cu_fld%internal%ystop\n"
        "        do i=cu_fld%internal%xstart,cu_fld%internal%xstop\n"
        "          call compute_cu_code(i, j, cu_fld%data, p_fld%data, "
        "u_fld%data)\n"
        "        end do \n"
        "      end do \n"
        "      !$omp end parallel do")
    assert expected in gen
Beispiel #10
0
def test_driver_creation(tmpdir):
    '''Test that driver is created correctly for all variable access \
    modes (input, input-output, output).

    '''
    # Use tmpdir so that the driver is created in tmp
    tmpdir.chdir()

    etrans = GOceanExtractTrans()
    ctrans = GOConstLoopBoundsTrans()
    psy, invoke = get_invoke("driver_test.f90",
                             GOCEAN_API,
                             idx=0,
                             dist_mem=False)
    schedule = invoke.schedule
    # This test expects constant loop bounds
    ctrans.apply(schedule)

    etrans.apply(schedule.children[0], {'create_driver': True})
    # We are only interested in the driver, so ignore results.
    str(psy.gen)

    driver = tmpdir.join("driver-psy_extract_example_with_various_variable_"
                         "access_patterns-invoke_0_compute_kernel:compute_"
                         "kernel_code:r0.f90")
    assert driver.isfile()

    with driver.open("r") as driver_file:
        driver_code = driver_file.read()

    # This is an excerpt of the code that should get created.
    # It is tested line by line since there is other code in between
    # which is not important, and the order might also change. It also
    # tests if unique variable names are created in the driver: the user
    # program contains a local variable 'dx', which clashes with the grid
    # property dx. The grid property will be renamed to 'dx_1':
    expected = '''USE extract_psy_data_mod, ONLY: extract_PSyDataType
      IMPLICIT NONE
      REAL(KIND=8), allocatable, dimension(:,:) :: gphiu
      REAL(KIND=8), allocatable, dimension(:,:) :: out_fld
      REAL(KIND=8), allocatable, dimension(:,:) :: out_fld_post
      REAL(KIND=8), allocatable, dimension(:,:) :: in_fld
      REAL(KIND=8), allocatable, dimension(:,:) :: dx
      REAL(KIND=8), allocatable, dimension(:,:) :: dx_1
      REAL(KIND=8), allocatable, dimension(:,:) :: in_out_fld
      REAL(KIND=8), allocatable, dimension(:,:) :: in_out_fld_post

      TYPE(extract_PSyDataType) extract_psy_data
      CALL extract_psy_data%OpenRead("psy_extract_example_with_various_variable''' \
      '''_access_patterns", "invoke_0_compute_kernel:compute_kernel_code:r0")
      CALL extract_psy_data%ReadVariable("out_fld_post", out_fld_post)
      ALLOCATE (out_fld, mold=out_fld_post)
      out_fld = 0.0
      CALL extract_psy_data%ReadVariable("in_fld", in_fld)
      CALL extract_psy_data%ReadVariable("in_out_fld_post", in_out_fld_post)
      CALL extract_psy_data%ReadVariable("dx", dx)
      CALL extract_psy_data%ReadVariable("in_fld%grid%dx", dx_1)
      CALL extract_psy_data%ReadVariable("in_fld%grid%gphiu", gphiu)
      ! RegionStart
      DO j=2,jstop
        DO i=2,istop+1
          CALL compute_kernel_code(i, j, out_fld, in_out_fld, in_fld, ''' \
      '''dx, dx_1, gphiu)
        END DO
      END DO
      ! RegionEnd
      !
      ! Check out_fld
      ! Check i
      ! Check j
      ! Check in_out_fld'''
    expected_lines = expected.split("\n")
    for line in expected_lines:
        assert line in driver_code
Beispiel #11
0
def test_node_list_ompparallel_gocean1p0():
    ''' Test that applying Extract Transformation on a list of Nodes
    enclosed within an OMP Parallel Region produces the correct result
    in GOcean1.0 API. '''

    etrans = GOceanExtractTrans()
    ltrans = GOceanOMPLoopTrans()
    otrans = OMPParallelTrans()
    ctrans = GOConstLoopBoundsTrans()

    # Test a Loop nested within the OMP Parallel DO Directive
    psy, invoke = get_invoke("single_invoke_three_kernels.f90",
                             GOCEAN_API,
                             idx=0,
                             dist_mem=False)
    schedule = invoke.schedule

    # Apply GOConstLoopBoundsTrans
    ctrans.apply(schedule)
    # Apply GOceanOMPParallelLoopTrans to the first two Loops
    ltrans.apply(schedule.children[0])
    ltrans.apply(schedule.children[1])
    # and enclose them within a parallel region
    otrans.apply(schedule.children[0:2])
    # Now enclose the parallel region within an ExtractNode (inserted
    # at the previous location of the OMPParallelDirective
    etrans.apply(schedule.children[0])

    code = str(psy.gen)
    output = """
      ! ExtractStart
      !
      CALL extract_psy_data%PreStart("psy_single_invoke_three_kernels", """ \
      """"invoke_0:r0", 3, 4)
      CALL extract_psy_data%PreDeclareVariable("p_fld", p_fld)
      CALL extract_psy_data%PreDeclareVariable("u_fld", u_fld)
      CALL extract_psy_data%PreDeclareVariable("v_fld", v_fld)
      CALL extract_psy_data%PreDeclareVariable("cu_fld_post", cu_fld)
      CALL extract_psy_data%PreDeclareVariable("cv_fld_post", cv_fld)
      CALL extract_psy_data%PreDeclareVariable("i_post", i)
      CALL extract_psy_data%PreDeclareVariable("j_post", j)
      CALL extract_psy_data%PreEndDeclaration
      CALL extract_psy_data%ProvideVariable("p_fld", p_fld)
      CALL extract_psy_data%ProvideVariable("u_fld", u_fld)
      CALL extract_psy_data%ProvideVariable("v_fld", v_fld)
      CALL extract_psy_data%PreEnd
      !$omp parallel default(shared), private(i,j)
      !$omp do schedule(static)
      DO j=2,jstop
        DO i=2,istop+1
          CALL compute_cu_code(i, j, cu_fld%data, p_fld%data, u_fld%data)
        END DO
      END DO
      !$omp end do
      !$omp do schedule(static)
      DO j=2,jstop+1
        DO i=2,istop
          CALL compute_cv_code(i, j, cv_fld%data, p_fld%data, v_fld%data)
        END DO
      END DO
      !$omp end do
      !$omp end parallel
      CALL extract_psy_data%PostStart
      CALL extract_psy_data%ProvideVariable("cu_fld_post", cu_fld)
      CALL extract_psy_data%ProvideVariable("cv_fld_post", cv_fld)
      CALL extract_psy_data%ProvideVariable("i_post", i)
      CALL extract_psy_data%ProvideVariable("j_post", j)
      CALL extract_psy_data%PostEnd
      !
      ! ExtractEnd"""
    assert output in code
Beispiel #12
0
def test_single_node_ompparalleldo_gocean1p0_failing_const_loop():
    ''' Test that applying Extract Transformation on a Node enclosed
    within an OMP Parallel DO Directive produces the correct result
    in GOcean1.0 API. This test is mostly identical to the previous one,
    but uses const loop bounds. At this stage, the dependency analysis
    still reports `cv_fld%internal%xstart` etc as loop boundaries, but
    the code created will be using istop and jstop.

    '''
    etrans = GOceanExtractTrans()
    otrans = GOceanOMPParallelLoopTrans()
    ctrans = GOConstLoopBoundsTrans()

    # Test a Loop nested within the OMP Parallel DO Directive
    psy, invoke = get_invoke("single_invoke_three_kernels.f90",
                             GOCEAN_API,
                             idx=0,
                             dist_mem=False)
    schedule = invoke.schedule
    ctrans.apply(schedule)
    # Required for #969
    schedule.view()

    # Apply GOceanOMPParallelLoopTrans to the second Loop
    otrans.apply(schedule.children[1])

    # Now enclose the parallel region within an ExtractNode (inserted
    # at the previous location of the OMPParallelDoDirective
    etrans.apply(schedule.children[1])

    code = str(psy.gen)
    output = """      ! ExtractStart
      !
      CALL extract_psy_data%PreStart("psy_single_invoke_three_kernels", """ \
      """"invoke_0:compute_cv_code:r0", 6, 3)
      CALL extract_psy_data%PreDeclareVariable("istop", istop)
      CALL extract_psy_data%PreDeclareVariable("jstop", jstop)
      CALL extract_psy_data%PreDeclareVariable("p_fld", p_fld)
      CALL extract_psy_data%PreDeclareVariable("v_fld", v_fld)
      CALL extract_psy_data%PreDeclareVariable("cv_fld_post", cv_fld)
      CALL extract_psy_data%PreDeclareVariable("i_post", i)
      CALL extract_psy_data%PreDeclareVariable("j_post", j)
      CALL extract_psy_data%PreEndDeclaration
      CALL extract_psy_data%ProvideVariable("istop", istop)
      CALL extract_psy_data%ProvideVariable("jstop", jstop)
      CALL extract_psy_data%ProvideVariable("p_fld", p_fld)
      CALL extract_psy_data%ProvideVariable("v_fld", v_fld)
      CALL extract_psy_data%PreEnd
      !$omp parallel do default(shared), private(i,j), schedule(static)
      DO j=2,jstop+1
        DO i=2,istop
          CALL compute_cv_code(i, j, cv_fld%data, p_fld%data, v_fld%data)
        END DO
      END DO
      !$omp end parallel do
      CALL extract_psy_data%PostStart
      CALL extract_psy_data%ProvideVariable("cv_fld_post", cv_fld)
      CALL extract_psy_data%ProvideVariable("i_post", i)
      CALL extract_psy_data%ProvideVariable("j_post", j)
      CALL extract_psy_data%PostEnd
      !
      ! ExtractEnd"""
    assert output in code