def test_psy_init(kernel_outputdir): ''' Check that we create a psy_init() routine that sets-up the OpenCL environment. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) expected = ( " SUBROUTINE psy_init()\n" " USE fortcl, ONLY: ocl_env_init, add_kernels\n" " CHARACTER(LEN=30) kernel_names(1)\n" " LOGICAL, save :: initialised=.False.\n" " ! Check to make sure we only execute this routine once\n" " IF (.not. initialised) THEN\n" " initialised = .True.\n" " ! Initialise the OpenCL environment/device\n" " CALL ocl_env_init(1)\n" " ! The kernels this PSy layer module requires\n" " kernel_names(1) = \"compute_cu_code\"\n" " ! Create the OpenCL kernel objects. Expects to find all of " "the compiled\n" " ! kernels in PSYCLONE_KERNELS_FILE.\n" " CALL add_kernels(1, kernel_names)\n" " END IF\n" " END SUBROUTINE psy_init\n") assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy) # Test with a non-default number of OpenCL queues sched.coded_kernels()[0].set_opencl_options({'queue_number': 5}) generated_code = str(psy.gen) expected = ( " SUBROUTINE psy_init()\n" " USE fortcl, ONLY: ocl_env_init, add_kernels\n" " CHARACTER(LEN=30) kernel_names(1)\n" " LOGICAL, save :: initialised=.False.\n" " ! Check to make sure we only execute this routine once\n" " IF (.not. initialised) THEN\n" " initialised = .True.\n" " ! Initialise the OpenCL environment/device\n" " CALL ocl_env_init(5)\n" " ! The kernels this PSy layer module requires\n" " kernel_names(1) = \"compute_cu_code\"\n" " ! Create the OpenCL kernel objects. Expects to find all of " "the compiled\n" " ! kernels in PSYCLONE_KERNELS_FILE.\n" " CALL add_kernels(1, kernel_names)\n" " END IF\n" " END SUBROUTINE psy_init\n") assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_set_kern_float_arg(kernel_outputdir): ''' Check that we generate correct code to set a real, scalar kernel argument. ''' psy, _ = get_invoke("single_invoke_scalar_float_arg.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) expected = '''\ SUBROUTINE bc_ssh_code_set_args(kernel_obj, a_scalar, ssh_fld, ''' + \ '''xstop, tmask) USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER(KIND=c_intptr_t), intent(in), target :: ssh_fld, tmask INTEGER, intent(in), target :: xstop REAL(KIND=go_wp), intent(in), target :: a_scalar INTEGER ierr INTEGER(KIND=c_intptr_t), target :: kernel_obj ''' assert expected in generated_code expected = '''\ ! Set the arguments for the bc_ssh_code OpenCL Kernel ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(a_scalar), C_LOC(a_scalar)) CALL check_status('clSetKernelArg: arg 0 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(ssh_fld), C_LOC(ssh_fld)) CALL check_status('clSetKernelArg: arg 1 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(xstop), C_LOC(xstop)) CALL check_status('clSetKernelArg: arg 2 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(tmask), C_LOC(tmask)) CALL check_status('clSetKernelArg: arg 3 of bc_ssh_code', ierr) END SUBROUTINE bc_ssh_code_set_args''' assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_set_kern_args(kernel_outputdir): ''' Check that we generate the necessary code to set kernel arguments. ''' psy, _ = get_invoke("single_invoke_two_kernels.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) # Check we've only generated one set-args routine with arguments: # kernel object + kernel arguments + boundary values assert generated_code.count("SUBROUTINE compute_cu_code_set_args(" "kernel_obj, cu_fld, p_fld, u_fld, xstart, " "xstop, ystart, ystop)") == 1 # Declarations expected = '''\ USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER(KIND=c_intptr_t), intent(in), target :: cu_fld, p_fld, u_fld INTEGER, intent(in), target :: xstart, xstop, ystart, ystop INTEGER ierr INTEGER(KIND=c_intptr_t), target :: kernel_obj''' assert expected in generated_code expected = '''\ ! Set the arguments for the compute_cu_code OpenCL Kernel ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(cu_fld), C_LOC(cu_fld)) CALL check_status('clSetKernelArg: arg 0 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(p_fld), C_LOC(p_fld)) CALL check_status('clSetKernelArg: arg 1 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(u_fld), C_LOC(u_fld)) CALL check_status('clSetKernelArg: arg 2 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(xstart), C_LOC(xstart)) CALL check_status('clSetKernelArg: arg 3 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 4, C_SIZEOF(xstop), C_LOC(xstop)) CALL check_status('clSetKernelArg: arg 4 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 5, C_SIZEOF(ystart), C_LOC(ystart)) CALL check_status('clSetKernelArg: arg 5 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 6, C_SIZEOF(ystop), C_LOC(ystop)) CALL check_status('clSetKernelArg: arg 6 of compute_cu_code', ierr) END SUBROUTINE compute_cu_code_set_args''' assert expected in generated_code # The call to the set_args matches the expected kernel signature with # the boundary values converted to 0-indexing assert ("CALL compute_cu_code_set_args(kernel_compute_cu_code, " "cu_fld%device_ptr, p_fld%device_ptr, u_fld%device_ptr, " "xstart - 1, xstop - 1, " "ystart - 1, ystop - 1)" in generated_code) # There is also only one version of the set_args for the second kernel assert generated_code.count("SUBROUTINE time_smooth_code_set_args(" "kernel_obj, u_fld, unew_fld, uold_fld, " "xstart_1, xstop_1, ystart_1, ystop_1)") == 1 assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_use_stmts(kernel_outputdir): ''' Test that generating code for OpenCL results in the correct module use statements. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() expected = '''\ subroutine invoke_0_compute_cu(cu_fld, p_fld, u_fld) use fortcl, only: create_rw_buffer use fortcl, only: get_num_cmd_queues, get_cmd_queues, get_kernel_by_name use clfortran use iso_c_binding''' assert expected in generated_code assert "if (first_time) then" in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_invoke_use_stmts_and_decls(kernel_outputdir, monkeypatch, debug_mode): ''' Test that generating code for OpenCL results in the correct module use statements and declarations. ''' api_config = Config.get().api_conf("gocean1.0") monkeypatch.setattr(api_config, "_debug_mode", debug_mode) psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() expected = '''\ subroutine invoke_0_compute_cu(cu_fld, p_fld, u_fld) use fortcl, only: create_rw_buffer\n''' # When in debug mode, import also the check_status function if debug_mode: expected += " use ocl_utils_mod, only: check_status\n" expected += '''\ use fortcl, only: get_num_cmd_queues, get_cmd_queues, get_kernel_by_name use clfortran use iso_c_binding type(r2d_field), intent(inout), target :: cu_fld, p_fld, u_fld integer xstart, xstop, ystart, ystop integer(kind=c_size_t), target :: localsize(2) integer(kind=c_size_t), target :: globalsize(2) integer(kind=c_intptr_t), target :: write_event integer(kind=c_size_t) size_in_bytes integer(kind=c_intptr_t), target, save :: kernel_compute_cu_code logical, save :: first_time=.true. integer ierr integer(kind=c_intptr_t), pointer, save :: cmd_queues(:) integer, save :: num_cmd_queues ''' assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_invoke_opencl_initialisation(kernel_outputdir): ''' Test that generating code for OpenCL results in the correct OpenCL first time initialisation code ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() # Test that the necessary variables are declared at the beginning # of the invoke expected = '''\ integer(kind=c_size_t), target :: localsize(2) integer(kind=c_size_t), target :: globalsize(2) integer(kind=c_intptr_t), target :: write_event integer(kind=c_size_t) size_in_bytes integer(kind=c_intptr_t), target, save :: kernel_compute_cu_code logical, save :: first_time=.true. integer ierr integer(kind=c_intptr_t), pointer, save :: cmd_queues(:) integer, save :: num_cmd_queues''' assert expected in generated_code # Test that a conditional 'first_time' code is generated with the # expected initialisation statements expected = '''\ if (first_time) then first_time = .false. ! ensure opencl run-time is initialised for this psy-layer module call psy_init num_cmd_queues = get_num_cmd_queues() cmd_queues => get_cmd_queues() kernel_compute_cu_code = get_kernel_by_name("compute_cu_code") end if''' assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_opencl_compiler_works(kernel_outputdir): ''' Check that the specified compiler works for a hello-world opencl example. This is done in this file to alert the user that all compiles tests are skipped if only the '--compile' command line option is used (instead of --compileopencl) ''' Compile.skip_if_opencl_compilation_disabled() example_ocl_code = ''' program hello USE fortcl write (*,*) "Hello" end program hello ''' old_pwd = kernel_outputdir.chdir() try: with open("hello_world_opencl.f90", "w") as ffile: ffile.write(example_ocl_code) GOcean1p0OpenCLBuild(kernel_outputdir).\ compile_file("hello_world_opencl.f90", link=True) finally: old_pwd.chdir()
def test_psy_init_with_options(kernel_outputdir): ''' Check that we create a psy_init() routine that sets-up the OpenCL environment with the provided non-default options. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched, options={ "end_barrier": True, "enable_profiling": True, "out_of_order": True }) generated_code = str(psy.gen) assert "CALL ocl_env_init(1, ocl_device_num, .True., .True.)\n" \ in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_set_kern_args(kernel_outputdir): ''' Check that we generate the necessary code to set kernel arguments. ''' psy, _ = get_invoke("single_invoke_two_kernels.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) # Check we've only generated one set-args routine assert generated_code.count("SUBROUTINE compute_cu_code_set_args(" "kernel_obj, cu_fld, p_fld, u_fld)") == 1 # Declarations expected = '''\ SUBROUTINE compute_cu_code_set_args(kernel_obj, cu_fld, p_fld, u_fld) USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER(KIND=c_intptr_t), intent(in), target :: cu_fld, p_fld, u_fld INTEGER ierr INTEGER(KIND=c_intptr_t), target :: kernel_obj''' assert expected in generated_code expected = '''\ ! Set the arguments for the compute_cu_code OpenCL Kernel ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(cu_fld), C_LOC(cu_fld)) CALL check_status('clSetKernelArg: arg 0 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(p_fld), C_LOC(p_fld)) CALL check_status('clSetKernelArg: arg 1 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(u_fld), C_LOC(u_fld)) CALL check_status('clSetKernelArg: arg 2 of compute_cu_code', ierr) END SUBROUTINE compute_cu_code_set_args''' assert expected in generated_code assert generated_code.count("SUBROUTINE time_smooth_code_set_args(" "kernel_obj, u_fld, " "unew_fld, uold_fld)") == 1 assert ("CALL compute_cu_code_set_args(kernel_compute_cu_code, " "cu_fld%device_ptr, p_fld%device_ptr, " "u_fld%device_ptr)" in generated_code) assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_psy_init(kernel_outputdir, monkeypatch): ''' Check that we create a psy_init() routine that sets-up the OpenCL environment. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) expected = ( " SUBROUTINE psy_init()\n" " USE fortcl, ONLY: ocl_env_init, add_kernels\n" " CHARACTER(LEN=30) kernel_names(1)\n" " INTEGER :: ocl_device_num=1\n" " LOGICAL, save :: initialised=.False.\n" " ! Check to make sure we only execute this routine once\n" " IF (.not. initialised) THEN\n" " initialised = .True.\n" " ! Initialise the OpenCL environment/device\n" " CALL ocl_env_init(1, ocl_device_num, .False., .False.)\n" " ! The kernels this PSy layer module requires\n" " kernel_names(1) = \"compute_cu_code\"\n" " ! Create the OpenCL kernel objects. Expects to find all of " "the compiled\n" " ! kernels in FORTCL_KERNELS_FILE.\n" " CALL add_kernels(1, kernel_names)\n" " END IF\n" " END SUBROUTINE psy_init\n") assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy) # Test with a non-default number of OpenCL queues sched.coded_kernels()[0].set_opencl_options({'queue_number': 5}) generated_code = str(psy.gen) expected = ( " SUBROUTINE psy_init()\n" " USE fortcl, ONLY: ocl_env_init, add_kernels\n" " CHARACTER(LEN=30) kernel_names(1)\n" " INTEGER :: ocl_device_num=1\n" " LOGICAL, save :: initialised=.False.\n" " ! Check to make sure we only execute this routine once\n" " IF (.not. initialised) THEN\n" " initialised = .True.\n" " ! Initialise the OpenCL environment/device\n" " CALL ocl_env_init(5, ocl_device_num, .False., .False.)\n" " ! The kernels this PSy layer module requires\n" " kernel_names(1) = \"compute_cu_code\"\n" " ! Create the OpenCL kernel objects. Expects to find all of " "the compiled\n" " ! kernels in FORTCL_KERNELS_FILE.\n" " CALL add_kernels(1, kernel_names)\n" " END IF\n" " END SUBROUTINE psy_init\n") assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy) # Test with a different configuration value for OCL_DEVICES_PER_NODE # that needs a mod() and a get_rank() expression. monkeypatch.setattr(Config.get(), "_ocl_devices_per_node", 2) generated_code = str(psy.gen) expected = ( " SUBROUTINE psy_init()\n" " USE parallel_mod, ONLY: get_rank\n" " USE fortcl, ONLY: ocl_env_init, add_kernels\n" " CHARACTER(LEN=30) kernel_names(1)\n" " INTEGER :: ocl_device_num=1\n" " LOGICAL, save :: initialised=.False.\n" " ! Check to make sure we only execute this routine once\n" " IF (.not. initialised) THEN\n" " initialised = .True.\n" " ! Initialise the OpenCL environment/device\n" " ocl_device_num = mod(get_rank() - 1, 2) + 1\n" " CALL ocl_env_init(5, ocl_device_num, .False., .False.)\n" " ! The kernels this PSy layer module requires\n" " kernel_names(1) = \"compute_cu_code\"\n" " ! Create the OpenCL kernel objects. Expects to find all of " "the compiled\n" " ! kernels in FORTCL_KERNELS_FILE.\n" " CALL add_kernels(1, kernel_names)\n" " END IF\n" " END SUBROUTINE psy_init\n") assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_invoke_opencl_kernel_call(kernel_outputdir, monkeypatch, debug_mode): ''' Check that the Invoke OpenCL produce the expected kernel enqueue statement to launch OpenCL kernels. ''' api_config = Config.get().api_conf("gocean1.0") monkeypatch.setattr(api_config, "_debug_mode", debug_mode) psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) # Set up globalsize and localsize values expected = '''\ globalsize = (/p_fld%grid%nx, p_fld%grid%ny/) localsize = (/64, 1/)''' if debug_mode: # Check that the globalsize first dimension is a multiple of # the localsize first dimension expected += ''' IF (mod(p_fld%grid%nx, 64) .ne. 0) THEN CALL check_status("Global size is not a multiple of local size \ (mandatory in OpenCL < 2.0).", -1) END IF''' assert expected in generated_code # Call the set_args subroutine with the boundaries corrected for the # OpenCL 0-indexing expected += ''' CALL compute_cu_code_set_args(kernel_compute_cu_code, \ cu_fld%device_ptr, p_fld%device_ptr, u_fld%device_ptr, \ xstart - 1, xstop - 1, \ ystart - 1, ystop - 1)''' expected += ''' ! Launch the kernel''' if debug_mode: # Check that there is no pending error in the queue before launching # the kernel expected += ''' ierr = clFinish(cmd_queues(1)) CALL check_status('Errors before compute_cu_code launch', ierr)''' expected += ''' ierr = clEnqueueNDRangeKernel(cmd_queues(1), kernel_compute_cu_code, \ 2, C_NULL_PTR, C_LOC(globalsize), C_LOC(localsize), 0, C_NULL_PTR, \ C_NULL_PTR) !''' if debug_mode: # Check that there are no errors during the kernel launch or during # the execution of the kernel. expected += ''' CALL check_status('compute_cu_code clEnqueueNDRangeKernel', ierr) ierr = clFinish(cmd_queues(1)) CALL check_status('Errors during compute_cu_code', ierr)''' assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)