def test_profile_nemo_no_acc_kernels(parser): ''' Check that the automatic kernel-level profiling does not add any calls for the case of two kernels within an OpenACC kernels region. No calls are added because the PSyData routines would have to have been compiled for execution on the GPU. ''' acctrans = ACCKernelsTrans() Profiler.set_options([Profiler.KERNELS]) psy, schedule = get_nemo_schedule( parser, "program do_loop\n" "integer, parameter :: jpi=5, jpj=5\n" "integer :: ji, jj\n" "real :: sto_tmp(jpi,jpj)\n" "do jj = 1, jpj\n" " do ji = 1,jpi\n" " sto_tmp(ji,jj) = 1.0d0\n" " end do\n" "end do\n" "do ji = 1, jpi\n" " sto_tmp(ji,1) = 0.0d0\n" "end do\n" "end program do_loop\n") acctrans.apply(schedule.children) Profiler.add_profile_nodes(schedule, Loop) code = str(psy.gen).lower() assert "profile_psy" not in code
def trans(psy): '''PSyclone transformation script for the dynamo0p3 api to apply OpenACC Kernels directives to all loops generically. It also outputs a textual representation of the transformated PSyIR. :param psy: a PSyclone PSy object which captures the algorithm and \ kernel information required by PSyclone. :type psy: subclass of :py:class:`psyclone.psyGen.PSy` ''' kernels_trans = ACCKernelsTrans() # Loop over all of the Invokes in the PSy object for invoke in psy.invokes.invoke_list: schedule = invoke.schedule # Apply kernels directives to any loop nodes that are # children of the schedule node. for loop in schedule.loops(): kernels_trans.apply([loop]) schedule.view() return psy
def test_kernels_within_if(parser): ''' Check that we can put a kernels region within an if block. ''' reader = FortranStringReader("program if_then\n" "logical :: do_this\n" "integer :: ji, jpi\n" "real :: fld(:), fld2d(:,:)\n" "if(do_this)then\n" " do ji=1,jpi\n" " fld(ji) = 1.0\n" " end do\n" "else\n" " fld2d(:,:) = 0.0\n" "end if\n" "end program if_then\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() schedule, _ = acc_trans.apply(schedule.children[0].if_body, {"default_present": True}) schedule, _ = acc_trans.apply(schedule.children[0].else_body, {"default_present": True}) new_code = str(psy.gen) assert (" IF (do_this) THEN\n" " !$ACC KERNELS DEFAULT(PRESENT)\n" " DO ji = 1, jpi\n" in new_code) assert (" END DO\n" " !$ACC END KERNELS\n" " ELSE\n" " !$ACC KERNELS DEFAULT(PRESENT)\n" " fld2d(:, :) = 0.0\n" " !$ACC END KERNELS\n" " END IF\n" in new_code)
def test_nemo_acc_kernels(default_present, expected, parser): ''' Tests that an OpenACC kernels directive is handled correctly in the NEMO API. ''' # Generate fparser2 parse tree from Fortran code. reader = FortranStringReader(NEMO_TEST_CODE) code = parser(reader) psy = PSyFactory("nemo", distributed_memory=False).create(code) nemo_sched = psy.invokes.invoke_list[0].schedule # Now apply a kernels transform ktrans = ACCKernelsTrans() options = {"default_present": default_present} ktrans.apply(nemo_sched[0], options) fvisitor = FortranWriter() result = fvisitor(nemo_sched) correct = '''!$acc kernels{0} do i = 1, 20, 2 a = 2 * i + d(i) c(i) = a b(i) = b(i) + a + c(i) enddo !$acc end kernels'''.format(expected) assert correct in result cvisitor = CWriter() with pytest.raises(VisitorError) as err: _ = cvisitor(nemo_sched[0]) assert "Unsupported node 'ACCKernelsDirective' found" in str(err.value)
def test_two_loops_inside_kernels(parser): ''' Check that we can mark-up one or both loops inside a KERNELS region containing two loops. ''' reader = FortranStringReader("program two_loops\n" " integer :: ji\n" " real :: array(10)\n" " do ji = 1, 10\n" " array(ji) = 1.0\n" " end do\n" " do ji = 1, 5\n" " array(ji) = 2.0*array(ji)\n" " end do\n" "end program two_loops\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule # Enclose both loops within a KERNELS region acc_trans = ACCKernelsTrans() acc_trans.apply(schedule[0:2]) # Apply a loop transformation to just the first loop loop_trans = ACCLoopTrans() loop_trans.apply(schedule[0].dir_body[0]) output = str(psy.gen).lower() assert (" !$acc kernels\n" " !$acc loop independent\n" " do ji = 1, 10\n" in output) assert (" end do\n" " !$acc end kernels\n" "end program" in output) loop_trans.apply(schedule[0].dir_body[1]) output = str(psy.gen).lower() assert (" !$acc loop independent\n" " do ji = 1, 5\n" in output) assert (" end do\n" " !$acc end kernels\n" "end program" in output)
def test_loop_after_implicit_kernels(parser): ''' Test the addition of a loop directive after some implicit loops within a kernels region. ''' reader = FortranStringReader("program two_loops\n" " integer :: ji\n" " real :: array(10,10)\n" " array(:,:) = -1.0\n" " do ji = 1, 5\n" " array(ji,1) = 2.0*array(ji,2)\n" " end do\n" "end program two_loops\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() loop_trans = ACCLoopTrans() acc_trans.apply(schedule[0:2]) loop_trans.apply(schedule[0].dir_body[1]) output = str(psy.gen).lower() assert (" !$acc kernels\n" " array(:, :) = - 1.0\n" " !$acc loop independent\n" " do ji = 1, 5\n" in output) assert (" end do\n" " !$acc end kernels\n" "end program" in output)
def test_multikern_if(parser): ''' Check that we can include an if-block containing multiple loops within a kernels region. ''' reader = FortranStringReader("program implicit_loop\n" "logical :: do_this\n" "integer :: jk\n" "real(kind=wp) :: sto_tmp(5)\n" "if(do_this)then\n" "do jk = 1, 3\n" " sto_tmp(jk) = jk\n" "end do\n" "else\n" "do jk = 1, 5\n" " sto_tmp(jk) = jk\n" "end do\n" "end if\n" "end program implicit_loop\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply(schedule.children[0:1], {"default_present": True}) gen_code = str(psy.gen).lower() assert ("!$acc kernels default(present)\n" " if (do_this) then\n" " do jk = 1, 3\n" in gen_code) assert (" end do\n" " end if\n" " !$acc end kernels\n" "end program implicit_loop" in gen_code)
def test_kernels_dag_name(parser): ''' Check that we get the correct name for a DAG node for an OpenACC kernels directive. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply(schedule.children, {"default_present": True}) assert schedule.children[0].dag_name == "ACC_kernels_1"
def test_kernels_single_node(parser): ''' Check that we can apply the ACCKernelsTrans to a single node instead of to a list of nodes. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply(schedule[0], {"default_present": True}) assert isinstance(schedule[0], ACCKernelsDirective)
def test_kernels_view(parser, capsys): ''' Test the ACCKernelsDirective.view() method. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply(schedule.children, {"default_present": True}) schedule.view() output, _ = capsys.readouterr() assert "[ACC Kernels]" in output
def test_loop_inside_kernels(parser): ''' Check that we can put an ACC LOOP directive inside a KERNELS region. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply([schedule[0]]) loop_trans = ACCLoopTrans() loop_trans.apply(schedule[0].dir_body[0]) output = str(psy.gen).lower() assert (" !$acc kernels\n" " !$acc loop independent\n" " do ji = 1, jpj\n" in output) assert (" end do\n" " !$acc end kernels\n" in output)
def test_implicit_loop(parser): ''' Check that the transformation generates correct code when applied to an implicit loop. ''' reader = FortranStringReader("program implicit_loop\n" "real(kind=wp) :: sto_tmp(5,5)\n" "sto_tmp(:,:) = 0.0_wp\n" "end program implicit_loop\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply(schedule.children[0:1], {"default_present": True}) gen_code = str(psy.gen) assert (" !$ACC KERNELS DEFAULT(PRESENT)\n" " sto_tmp(:, :) = 0.0_wp\n" " !$ACC END KERNELS\n" in gen_code)
def trans(psy): '''PSyclone transformation script for the dynamo0p3 api to apply OpenACC loop, parallel and enter data directives generically. :param psy: a PSyclone PSy object which captures the algorithm and \ kernel information required by PSyclone. :type psy: subclass of :py:class:`psyclone.psyGen.PSy` ''' kernels_trans = ACCKernelsTrans() routine_trans = ACCRoutineTrans() ctrans = Dynamo0p3ColourTrans() loop_trans = ACCLoopTrans() enter_trans = ACCEnterDataTrans() const = LFRicConstants() # Loop over all of the Invokes in the PSy object for invoke in psy.invokes.invoke_list: schedule = invoke.schedule # Colour loops as required for loop in schedule.loops(): if loop.field_space.orig_name \ not in const.VALID_DISCONTINUOUS_NAMES \ and loop.iteration_space == "cell_column": ctrans.apply(loop) # Add Kernels and Loop directives for loop in schedule.loops(): if loop.loop_type != "colours": kernels_trans.apply([loop]) loop_trans.apply(loop) # Add Routine directive to kernels for kernel in schedule.coded_kernels(): routine_trans.apply(kernel) # Add Enter Data directive covering all of the PSy layer. enter_trans.apply(schedule) schedule.view() return psy
def test_kernels_around_where_construct(parser): ''' Check that we can put a WHERE construct inside a KERNELS region. ''' reader = FortranStringReader("program where_test\n" " integer :: flag\n" " real :: a(:,:), b(:,:)\n" " where (a(:,:) < flag)\n" " b(:,:) = 0.0\n" " end where\n" "end program where_test\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply(schedule) assert isinstance(schedule[0], ACCKernelsDirective) assert isinstance(schedule[0].dir_body[0], Loop) new_code = str(psy.gen) assert (" !$ACC KERNELS\n" " WHERE (a(:, :) < flag)" in new_code) assert (" END WHERE\n" " !$ACC END KERNELS\n" in new_code)
def test_no_default_present(parser): ''' Check that we can create a kernels region with no 'default(present)' clause (as we will want to do when using managed memory). ''' reader = FortranStringReader(EXPLICIT_LOOP) code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() _, _ = acc_trans.apply(schedule.children, {"default_present": False}) gen_code = str(psy.gen) assert "!$ACC KERNELS\n" in gen_code
def test_kernels_around_where_stmt(parser): ''' Check that we can put a WHERE statement inside a KERNELS region. ''' reader = FortranStringReader("program where_test\n" " integer :: flag\n" " real :: a(:,:), b(:,:), c(:,:)\n" " a(:,:) = 1.0\n" " where (a(:,:) < flag) b(:,:) = 0.0\n" " c(:,:) = 1.0\n" "end program where_test\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() acc_trans.apply([schedule[1]]) new_code = str(psy.gen) assert (" a(:, :) = 1.0\n" " !$ACC KERNELS\n" " WHERE (a(:, :) < flag) b(:, :) = 0.0\n" " !$ACC END KERNELS\n" " c(:, :) = 1.0\n" in new_code)
def trans(psy): ''' PSyclone transformation script for the dynamo0p3 api to apply OpenACC Kernels directives generically.''' kernels_trans = ACCKernelsTrans() # Loop over all of the Invokes in the PSy object for invoke in psy.invokes.invoke_list: print("Transforming invoke '" + invoke.name + "'...") schedule = invoke.schedule _, _ = kernels_trans.apply(schedule) schedule.view() return psy
def test_no_psydata_in_kernels(parser, monkeypatch): ''' Check that we refuse to generate code when a kernels region contains PSyData calls. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule ptrans = ProfileTrans() acc_trans = ACCKernelsTrans() acc_trans.apply(schedule[0]) # Attempt to put a profiling call within the loop assign = schedule.walk(Assignment)[0] with pytest.raises(TransformationError) as err: ptrans.apply(assign) assert ("A PSyData node cannot be inserted inside an OpenACC region" in str(err.value)) # Monkeypatch the validate() method so as to avoid the checking # that it does monkeypatch.setattr(ptrans, "validate", lambda x, y: None) ptrans.apply(assign) # Check that an appropriate error is raised at code-generation time with pytest.raises(GenerationError) as err: _ = psy.gen assert ("Cannot include calls to PSyData routines within OpenACC " "regions" in str(err.value))
def test_no_loops(parser): ''' Check that the transformation refuses to generate a kernels region if it contains no loops. ''' reader = FortranStringReader("program no_loop\n" "integer :: jpk\n" "jpk = 30\n" "end program no_loop\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() with pytest.raises(TransformationError) as err: _, _ = acc_trans.apply(schedule.children[0:1], {"default_present": True}) assert ("must enclose at least one loop or array range but none were " "found" in str(err.value))
def test_no_code_block_kernels(parser): ''' Check that we reject attempts to enclose CodeBlocks within a Kernels region. ''' reader = FortranStringReader("program cb_mix\n" " integer :: ji, jpi\n" " real :: fld(:)\n" " do ji=1,jpi\n" " fld(ji) = 1.0\n" " end do\n" " write(*,*) 'Hello'\n" "end program cb_mix\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() with pytest.raises(TransformationError) as err: _, _ = acc_trans.apply(schedule.children) assert ("'CodeBlock' cannot be enclosed by a ACCKernelsTrans " in str(err.value))
def test_no_kernels_error(parser): ''' Check that the transformation rejects an attempt to put things that aren't kernels inside a kernels region. ''' reader = FortranStringReader("program write_out\n" "integer :: ji, jpj\n" "real(kind=wp) :: sto_tmp(5)\n" "do ji = 1,jpj\n" "read(*,*) sto_tmp(ji)\n" "end do\n" "do ji = 1,jpj\n" "write(*,*) sto_tmp(ji)\n" "end do\n" "end program write_out\n") code = parser(reader) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule acc_trans = ACCKernelsTrans() with pytest.raises(TransformationError) as err: _, _ = acc_trans.apply(schedule.children[0:2], {"default_present": True}) assert ("cannot be enclosed by a ACCKernelsTrans transformation" in str(err.value))