def trans(psy): ''' Transform a specific Schedule by making all loops over levels OpenMP parallel. :param psy: the object holding all information on the PSy layer \ to be modified. :type psy: :py:class:`psyclone.psyGen.PSy` :returns: the transformed PSy object :rtype: :py:class:`psyclone.psyGen.PSy` ''' from psyclone.psyGen import TransInfo from psyclone.nemo import NemoKern # Get the Schedule of the target routine sched = psy.invokes.get('tra_ldf_iso').schedule # Get the transformation we will apply ompt = TransInfo().get_trans_name('OMPParallelLoopTrans') # Apply it to each loop over levels containing a kernel for loop in sched.loops(): # TODO loop.kernel method needs extending to cope with # multiple kernels kernels = loop.walk(NemoKern) if kernels and loop.loop_type == "levels": sched, _ = ompt.apply(loop) psy.invokes.get('tra_ldf_iso').schedule = sched # Return the modified psy object return psy
def test_explicit_loop(parser): ''' Check that we can apply the transformation to an explicit loop. ''' reader = FortranStringReader("program do_loop\n" "real :: sto_tmp(jpj), sto_tmp2(jpj)\n" "do ji = 1,jpj\n" " sto_tmp(ji) = 1.0d0\n" "end do\n" "do ji = 1,jpj\n" " sto_tmp2(ji) = 1.0d0\n" "end do\n" "end program do_loop\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCLoopTrans') schedule, _ = acc_trans.apply(schedule.children[0]) schedule, _ = acc_trans.apply(schedule.children[1], independent=False) code = str(psy.gen) assert ("PROGRAM do_loop\n" " REAL :: sto_tmp(jpj), sto_tmp2(jpj)\n" " !$ACC LOOP INDEPENDENT\n" " DO ji = 1, jpj\n" " sto_tmp(ji) = 1.0D0\n" " END DO\n" " !$ACC LOOP\n" " DO ji = 1, jpj\n" " sto_tmp2(ji) = 1.0D0\n" " END DO\n" "END PROGRAM do_loop" in code)
def trans(psy): ''' Python script intended to be passed to PSyclone's generate() function via the -s option. Applies OpenMP to every loop before enclosing them all within a single OpenMP PARALLEL region. ''' from psyclone.psyGen import TransInfo tinfo = TransInfo() ltrans = tinfo.get_trans_name('GOceanOMPLoopTrans') rtrans = tinfo.get_trans_name('OMPParallelTrans') schedule = psy.invokes.get('invoke_0').schedule # schedule.view() # Apply the OpenMP Loop transformation to *every* loop # in the schedule for child in schedule.children: newschedule, _ = ltrans.apply(child) schedule = newschedule # Enclose all of these loops within a single OpenMP # PARALLEL region newschedule, _ = rtrans.apply(schedule.children) psy.invokes.get('invoke_0').schedule = newschedule return psy
def test_omp_private_declaration(): ''' Check code generation and private/shared declaration when an assignment is parallelised. In this case the code is like: !$omp parallel default(shared), private() jpk = 100 do k=1, jpk ... enddo !$omp end parallel do k=1, jpk ... In this case jpk should not be declared private, since then it is not defined in the next loop.''' psy, invoke_info = get_invoke("explicit_do_two_loops.f90", api=API, idx=0) schedule = invoke_info.schedule omp_parallel = TransInfo().get_trans_name('OMPParallelTrans') # Apply "omp parallel" around one assignment to a scalar variable # and a loop using this variable as loop boundary. Parallelising an # assignment statement is not allowed by default, so we need to disable # the node type check in order to apply the omp parallel transform. omp_parallel.apply(schedule.children[0:2], {'node-type-check': False}) expected = "!$omp parallel default(shared), private(ji,jj,jk)" gen_code = str(psy.gen).lower() assert expected in gen_code
def test_parallel_if_block(parser): ''' Check that we can enclose an IF-block within a parallel region. ''' reader = FortranStringReader("program do_loop\n" "integer :: ji\n" "integer, parameter :: jpi=64\n" "logical :: init\n" "real :: sto_tmp(jpi), sto_tmp2(jpi)\n" "if(init)then\n" " do ji = 1,jpi\n" " sto_tmp(ji) = 1.0d0\n" " end do\n" "else\n" " do ji = 1,jpi\n" " sto_tmp2(ji) = 1.0d0\n" " end do\n" "end if\n" "end program do_loop\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCParallelTrans') schedule, _ = acc_trans.apply(schedule.children[0:1]) code = str(psy.gen) assert (" !$ACC PARALLEL\n" " IF (init) THEN\n" " DO ji = 1, jpi\n" in code) assert (" END DO\n" " END IF\n" " !$ACC END PARALLEL\n" in code)
def test_omp_explicit_gen(): ''' Check code generation for a single explicit loop containing a kernel. ''' psy, invoke_info = get_invoke("explicit_do.f90", api=API, idx=0) schedule = invoke_info.schedule omp_trans = TransInfo().get_trans_name('OMPParallelLoopTrans') for loop in schedule.loops(): kernel = loop.kernel if kernel and loop.loop_type == "levels": omp_trans.apply(loop) gen_code = str(psy.gen).lower() expected = ("program explicit_do\n" " implicit none\n" " integer :: ji, jj, jk\n" " integer, parameter :: jpi = 2, jpj = 4, jpk = 6\n" " real :: r\n" " real, dimension(jpi, jpj, jpk) :: umask\n" " !$omp parallel do default(shared), private(ji,jj,jk), " "schedule(static)\n" " do jk = 1, jpk\n" " do jj = 1, jpj\n" " do ji = 1, jpi\n" " umask(ji, jj, jk) = ji * jj * jk / r\n" " end do\n" " end do\n" " end do\n" " !$omp end parallel do\n" "end program explicit_do") assert expected in gen_code # Check that calling gen a second time gives the same code gen_code = str(psy.gen).lower() assert expected in gen_code
def trans(psy): ''' Transform a specific Schedule by making all loops over levels OpenMP parallel. :param psy: the object holding all information on the PSy layer \ to be modified. :type psy: :py:class:`psyclone.psyGen.PSy` :returns: the transformed PSy object :rtype: :py:class:`psyclone.psyGen.PSy` ''' from psyclone.psyGen import TransInfo from psyclone.nemo import NemoKern # Get the transformation we will apply ompt = TransInfo().get_trans_name('OMPParallelLoopTrans') for invoke in psy.invokes.invoke_list: # Get the Schedule of the target routine sched = invoke.schedule # Apply the OMP transformation to each loop over levels containing # a kernel for loop in sched.loops(): kernels = loop.walk(NemoKern) if kernels and loop.loop_type == "levels": ompt.apply(loop) # Return the modified psy object return psy
def test_parallel_two_loops(parser): ''' Check that we can enclose two loops within a parallel region. ''' reader = FortranStringReader("program do_loop\n" "integer :: ji\n" "integer, parameter :: jpi=11\n" "real :: sto_tmp(jpi), sto_tmp2(jpi)\n" "do ji = 1,jpi\n" " sto_tmp(ji) = 1.0d0\n" "end do\n" "do ji = 1,jpi\n" " sto_tmp2(ji) = 1.0d0\n" "end do\n" "end program do_loop\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCParallelTrans') schedule, _ = acc_trans.apply(schedule[0:2]) code = str(psy.gen) assert ("PROGRAM do_loop\n" " INTEGER :: ji\n" " INTEGER, PARAMETER :: jpi = 11\n" " REAL :: sto_tmp(jpi), sto_tmp2(jpi)\n" " !$ACC PARALLEL\n" " DO ji = 1, jpi\n" " sto_tmp(ji) = 1.0D0\n" " END DO\n" " DO ji = 1, jpi\n" " sto_tmp2(ji) = 1.0D0\n" " END DO\n" " !$ACC END PARALLEL\n" "END PROGRAM do_loop" in code)
def test_no_code_blocks(parser): ''' Check that we refuse to include CodeBlocks (i.e. code that we don't recognise) within a data region. ''' reader = FortranStringReader("program write_out\n" " integer :: ji, jpj\n" "real(kind=wp) :: sto_tmp(5)\n" "do ji = 1,jpj\n" "read(*,*) sto_tmp(ji)\n" "end do\n" "do ji = 1,jpj\n" "write(*,*) sto_tmp(ji)\n" "end do\n" "end program write_out\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') with pytest.raises(TransformationError) as err: _, _ = acc_trans.apply(schedule.children[0:1]) assert ("'CodeBlock' cannot be enclosed by a ACCDataTrans" in str(err.value)) with pytest.raises(TransformationError) as err: _, _ = acc_trans.apply(schedule.children[1:2]) assert ("'CodeBlock' cannot be enclosed by a ACCDataTrans" in str(err.value))
def test_implicit_loop_sched2(): ''' Check that we get the correct schedule when we transform an implicit loop over the i-j slab within an explicit loop levels. ''' _, invoke_info = parse(os.path.join(BASE_PATH, "explicit_over_implicit.f90"), api=API, line_length=False) psy = PSyFactory(API).create(invoke_info) exp_trans = TransInfo().get_trans_name('NemoExplicitLoopTrans') sched = psy.invokes.invoke_list[0].schedule loop_levels = sched.children[0] _, _ = exp_trans.apply(loop_levels.children[0]) # We should have 3 loops (one from the explicit loop over levels and # the other two from the implicit loops over ji and jj). loops = sched.walk(nemo.NemoLoop) assert len(loops) == 3 assert loop_levels.children[0].loop_type == "lat" kerns = sched.kern_calls() assert not kerns _, _ = exp_trans.apply(loop_levels.children[0].children[0]) gen_code = str(psy.gen) assert (" INTEGER :: jj\n" " INTEGER :: ji\n" " DO jk = 1, jpk\n" " DO jj = 1, jpj, 1\n" " DO ji = 1, jpi, 1\n" " umask(ji, jj, jk) = vmask(ji, jj, jk) + 1.0\n" " END DO\n" " END DO\n" " END DO\n" "END PROGRAM explicit_over_implicit" in gen_code) # Check that we haven't got duplicate declarations of the loop vars assert gen_code.count("INTEGER :: ji") == 1
def test_multikern_if(parser): ''' Check that we can include an if-block containing multiple loops within a kernels region. ''' reader = FortranStringReader("program implicit_loop\n" "real(kind=wp) :: sto_tmp(5)\n" "if(do_this)then\n" "do jk = 1, 3\n" " sto_tmp(jk) = jk\n" "end do\n" "else\n" "do jk = 1, 5\n" " sto_tmp(jk) = jk\n" "end do\n" "end if\n" "end program implicit_loop\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCKernelsTrans') schedule, _ = acc_trans.apply(schedule.children[0:1], default_present=True) gen_code = str(psy.gen).lower() assert ("!$acc kernels default(present)\n" " if (do_this) then\n" " do jk = 1, 3\n" in gen_code) assert (" end do\n" " end if\n" " !$acc end kernels\n" "end program implicit_loop" in gen_code)
def test_kernels_within_if(parser): ''' Check that we can put a kernels region within an if block. ''' reader = FortranStringReader("program if_then\n" "if(do_this)then\n" " do ji=1,jpi\n" " fld(ji) = 1.0\n" " end do\n" "else\n" " fld2d(:,:) = 0.0\n" "end if\n" "end program if_then\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCKernelsTrans') schedule, _ = acc_trans.apply(schedule.children[0].if_body, default_present=True) schedule, _ = acc_trans.apply(schedule.children[0].else_body, default_present=True) new_code = str(psy.gen) assert (" IF (do_this) THEN\n" " !$ACC KERNELS DEFAULT(PRESENT)\n" " DO ji = 1, jpi\n" in new_code) assert (" END DO\n" " !$ACC END KERNELS\n" " ELSE\n" " !$ACC KERNELS DEFAULT(PRESENT)\n" " fld2d(:, :) = 0.0\n" " !$ACC END KERNELS\n" " END IF\n" in new_code)
def test_missed_array_case(parser): ''' Check that we raise the expected InternalError if our internal sanity check spots that we've missed an array access. TODO #309 - remove this test. ''' code = ("program do_bound\n" " integer :: ice_mask(8,8)\n" " real(kind=wp) :: trim_width(8), zdta(8,8)\n" " integer :: ji, jj, dom\n" " do jj = 1, trim_width(dom)\n" " do ji = 1, 8\n" " select case(ice_mask(ji,jj))\n" " case(0)\n" " zdta(ji,jj) = 1.0\n" " case(1)\n" " zdta(ji,jj) = 0.0\n" " end select\n" " end do\n" " end do\n" "end program do_bound\n") reader = FortranStringReader(code) ptree = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(ptree) schedule = psy.invokes.get('do_bound').schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') # Put the second loop nest inside a data region acc_trans.apply(schedule.children) with pytest.raises(InternalError) as err: _ = str(psy.gen) assert ("ArrayReference 'ice_mask' present in source code (" "'ice_mask(ji, jj)') but not identified" in str(err.value))
def test_replicated_loop(parser, tmpdir): '''Check code generation with two loops that have the same structure. ''' reader = FortranStringReader("subroutine replicate()\n" " INTEGER :: dummy\n" " REAL :: zwx(10,10)\n" " zwx(:,:) = 0.e0\n" " zwx(:,:) = 0.e0\n" "END subroutine replicate\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.get('replicate').schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') schedule, _ = acc_trans.apply(schedule.children[0:1]) schedule, _ = acc_trans.apply(schedule.children[1:2]) gen_code = str(psy.gen) assert (" !$ACC DATA COPYOUT(zwx)\n" " zwx(:, :) = 0.E0\n" " !$ACC END DATA\n" " !$ACC DATA COPYOUT(zwx)\n" " zwx(:, :) = 0.E0\n" " !$ACC END DATA" in gen_code) assert Compile(tmpdir).string_compiles(gen_code)
def test_omp_explicit_gen(): ''' Check code generation for a single explicit loop containing a kernel. ''' _, invoke_info = parse(os.path.join(BASE_PATH, "explicit_do.f90"), api=API, line_length=False) psy = PSyFactory(API, distributed_memory=False).create(invoke_info) schedule = psy.invokes.get('explicit_do').schedule omp_trans = TransInfo().get_trans_name('OMPParallelLoopTrans') for loop in schedule.loops(): kernel = loop.kernel if kernel and loop.loop_type == "levels": schedule, _ = omp_trans.apply(loop) gen_code = str(psy.gen).lower() expected = ("program explicit_do\n" " implicit none\n" " integer :: ji, jj, jk\n" " integer :: jpi, jpj, jpk\n" " real, dimension(jpi, jpj, jpk) :: umask\n" " !$omp parallel do default(shared), private(ji,jj,jk), " "schedule(static)\n" " do jk = 1, jpk\n" " do jj = 1, jpj\n" " do ji = 1, jpi\n" " umask(ji, jj, jk) = ji * jj * jk / r\n" " end do\n" " end do\n" " end do\n" " !$omp end parallel do\n" "end program explicit_do") assert expected in gen_code # Check that calling gen a second time gives the same code gen_code = str(psy.gen).lower() assert expected in gen_code
def test_kernels_dag_name(parser): ''' Check that we get the correct name for a DAG node for an OpenACC kernels directive. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCKernelsTrans') schedule, _ = acc_trans.apply(schedule.children[0:2], default_present=True) assert schedule.children[0].dag_name == "ACC_kernels_1"
def test_array_section(): '''Check code generation with a arrays accessed via an array section. ''' psy, invoke_info = get_invoke("array_section.f90", api=API, idx=0) schedule = invoke_info.schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') schedule, _ = acc_trans.apply(schedule.children) gen_code = str(psy.gen) assert "!$ACC DATA COPYIN(b,c) COPYOUT(a)" in gen_code
def test_kernels_view(parser, capsys): ''' Test the ACCKernelsDirective.view() method. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCKernelsTrans') schedule, _ = acc_trans.apply(schedule.children[0:2], default_present=True) schedule.view() output, _ = capsys.readouterr() assert "[ACC Kernels]" in output
def test_data_ref(): '''Check code generation with an array accessed via a derived type. ''' psy, invoke_info = get_invoke("data_ref.f90", api=API, idx=0) schedule = invoke_info.schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') schedule, _ = acc_trans.apply(schedule.children) gen_code = str(psy.gen) assert "!$ACC DATA COPYIN(a) COPYOUT(prof,prof%npind)" in gen_code
def test_data_single_node(parser): ''' Check that the ACCDataTrans works if passed a single node rather than a list. ''' reader = FortranStringReader(EXPLICIT_DO) code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.get('explicit_do').schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') acc_trans.apply(schedule[0]) assert isinstance(schedule[0], ACCDataDirective)
def test_data_no_gen_code(): ''' Check that the ACCDataDirective.gen_code() method raises the expected InternalError as it should not be called. ''' _, invoke_info = get_invoke("explicit_do.f90", api=API, idx=0) schedule = invoke_info.schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') schedule, _ = acc_trans.apply(schedule.children[0:2]) with pytest.raises(InternalError) as err: schedule.children[0].gen_code(schedule) assert ("ACCDataDirective.gen_code should not have " "been called" in str(err.value))
def test_no_default_present(parser): ''' Check that we can create a kernels region with no 'default(present)' clause (as we will want to do when using managed memory). ''' reader = FortranStringReader(EXPLICIT_LOOP) code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCKernelsTrans') _, _ = acc_trans.apply(schedule.children, default_present=False) gen_code = str(psy.gen) assert "!$ACC KERNELS\n" in gen_code
def test_add_region_invalid_data_move(): ''' Check that _add_region() raises the expected error if an invalid value for data_movement is supplied. ''' _, invoke_info = get_invoke("explicit_do.f90", api=API, idx=0) schedule = invoke_info.schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') schedule, _ = acc_trans.apply(schedule.children) datadir = schedule.children[0] with pytest.raises(InternalError) as err: datadir._add_region("DATA", "END DATA", data_movement="invalid") assert ("optional data_movement argument must be one of ['present', " "'analyse'] but got 'invalid'" in str(err.value))
def test_kernels_no_gen_code(parser): ''' Check that the ACCKernels.gen_code() method raises the expected error. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCKernelsTrans') schedule, _ = acc_trans.apply(schedule.children[0:2], default_present=True) with pytest.raises(InternalError) as err: schedule.children[0].gen_code(schedule) assert ("ACCKernelsDirective.gen_code should not have " "been called" in str(err))
def test_data_view(parser, capsys): ''' Check that the ACCDataDirective.view() method works as expected. ''' reader = FortranStringReader(EXPLICIT_DO) code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.get('explicit_do').schedule acc_trans = TransInfo().get_trans_name('ACCDataTrans') schedule, _ = acc_trans.apply(schedule.children) schedule.view() output, _ = capsys.readouterr() assert "[ACC DATA]" in output assert schedule.children[0].dag_name == "ACC_data_1"
def test_explicit_loop_validate(): ''' Test for the validate method of NemoExplicitLoopTrans. ''' _, invoke_info = parse(os.path.join(BASE_PATH, "explicit_over_implicit.f90"), api=API, line_length=False) psy = PSyFactory(API).create(invoke_info) exp_trans = TransInfo().get_trans_name('NemoExplicitLoopTrans') sched = psy.invokes.invoke_list[0].schedule # Attempt to apply the transformation to an explicit do loop with pytest.raises(TransformationError) as err: _ = exp_trans.apply(sched.children[0]) assert ("Cannot apply NemoExplicitLoopTrans to something that is " "not a NemoImplicitLoop (got " in str(err))
def trans(psy): ''' Transformation script entry function ''' tinfo = TransInfo() itrans = tinfo.get_trans_name('KernelModuleInline') schedule = psy.invokes.get('invoke_0').schedule # Module-Inline all coded kernels in this Schedule for kernel in schedule.coded_kernels(): itrans.apply(kernel) return psy
def test_seq_loop(parser): ''' Check that we can apply the transformation with the 'sequential' clause. ''' reader = FortranStringReader(SINGLE_LOOP) code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCLoopTrans') schedule, _ = acc_trans.apply(schedule.children[0], sequential=True) code = str(psy.gen) assert (" REAL(KIND = wp) :: sto_tmp(jpj)\n" " !$ACC LOOP SEQ\n" " DO ji = 1, jpj\n" in code)
def test_collapse_err(parser): ''' Check that attempting to apply the loop transformation with a 'collapse' depth creater than the number of nested loops raises an error. ''' reader = FortranStringReader(DOUBLE_LOOP) code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCLoopTrans') with pytest.raises(TransformationError) as err: _, _ = acc_trans.apply(schedule.children[0], {"collapse": 3}) assert ("Cannot apply COLLAPSE(3) clause to a loop nest containing " "only 2 loops" in str(err.value))
def test_no_loops(parser): ''' Check that the transformation refuses to generate a kernels region if it contains no loops. ''' reader = FortranStringReader("program no_loop\n" "integer :: jpk\n" "jpk = 30\n" "end program no_loop\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = TransInfo().get_trans_name('ACCKernelsTrans') with pytest.raises(TransformationError) as err: _, _ = acc_trans.apply(schedule.children[0:1], default_present=True) assert "must enclose at least one loop but none were found" in str(err)