def test_opencl_kernel_missing_boundary_symbol(): '''Check that an OpenCL file named modulename_kernelname_0 is generated. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Create dummy boundary symbols for the "name" kernel with one missing # symbol sched.symbol_table.new_symbol("a", tag="xstart_name", symbol_type=DataSymbol, datatype=INTEGER_TYPE) sched.symbol_table.new_symbol("c", tag="ystart_name", symbol_type=DataSymbol, datatype=INTEGER_TYPE) sched.symbol_table.new_symbol("d", tag="ystop_name", symbol_type=DataSymbol, datatype=INTEGER_TYPE) otrans = OCLTrans() otrans.apply(sched) sched.kernels()[0].name = "name" with pytest.raises(GenerationError) as err: _ = psy.gen # Generates the OpenCL kernels as a side-effect. assert ("Boundary symbol tag 'xstop_name' not found while generating the " "OpenCL code for kernel 'name'. Make sure to apply the " "GOMoveIterationBoundariesInsideKernelTrans before attempting the" " OpenCL code generation." in str(err.value))
def trans(psy): ''' Transformation routine for use with PSyclone. Applies the OpenCL transform to the first Invoke in the psy object. :param psy: the PSy object which this script will transform. :type psy: :py:class:`psyclone.psyGen.PSy` :returns: the transformed PSy object. :rtype: :py:class:`psyclone.psyGen.PSy` ''' # Get the Schedule associated with the first Invoke invoke = psy.invokes.invoke_list[0] sched = invoke.schedule # Convert any kernel accesses to global data into arguments ktrans = KernelGlobalsToArguments() for kern in sched.kernels(): ktrans.apply(kern) # Transform the Schedule cltrans = OCLTrans() cltrans.apply(sched, options={"end_barrier": True}) # Provide kernel-specific OpenCL optimization options move_boundaries_trans = GOMoveIterationBoundariesInsideKernelTrans() for kern in sched.kernels(): # Move the PSy-layer loop boundaries inside the kernel as a kernel # mask, this allows to iterate through the whole domain move_boundaries_trans.apply(kern) # Specify the OpenCL queue and workgroup size of the kernel kern.set_opencl_options({"queue_number": 1, 'local_size': 4}) return psy
def trans(psy): ''' Transformation routine for use with PSyclone. Applies the OpenCL transform to the first Invoke in the psy object. :param psy: the PSy object which this script will transform. :type psy: :py:class:`psyclone.psyGen.PSy` :returns: the transformed PSy object. :rtype: :py:class:`psyclone.psyGen.PSy` ''' from psyclone.transformations import OCLTrans, KernelGlobalsToArguments # Get the Schedule associated with the first Invoke invoke = psy.invokes.invoke_list[0] sched = invoke.schedule # Convert any kernel accesses to global data into arguments ktrans = KernelGlobalsToArguments() for kern in sched.kernels(): ktrans.apply(kern) # Transform the Schedule cltrans = OCLTrans() cltrans.apply(sched, options={"end_barrier": True}) # Provide kernel-specific OpenCL optimization options for kern in sched.kernels(): kern.set_opencl_options({"queue_number": 1, 'local_size': 4}) return psy
def test_set_kern_args_real_grid_property(): ''' Check that we generate correct code to set a real scalar grid property. ''' psy, _ = get_invoke("driver_test.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) expected = '''\ SUBROUTINE compute_kernel_code_set_args(kernel_obj, out_fld, in_out_fld, \ in_fld, dx, dx_1, gphiu, xstart, xstop, ystart, ystop) USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER(KIND=c_intptr_t), intent(in), target :: out_fld, in_out_fld, \ in_fld, dx, gphiu REAL(KIND=go_wp), intent(in), target :: dx_1 INTEGER, intent(in), target :: xstart, xstop, ystart, ystop''' assert expected in generated_code
def trans(psy): ''' Transformation routine for use with PSyclone. Applies the OpenCL transform to the first Invoke in the psy object. :param psy: the PSy object which this script will transform. :type psy: :py:class:`psyclone.psyGen.PSy` :returns: the transformed PSy object. :rtype: :py:class:`psyclone.psyGen.PSy` ''' ocl_trans = OCLTrans() fold_trans = FoldConditionalReturnExpressionsTrans() move_boundaries_trans = GOMoveIterationBoundariesInsideKernelTrans() # Get the Schedule associated with the first Invoke invoke = psy.invokes.invoke_list[0] sched = invoke.schedule # Transform the Schedule ocl_trans.apply(sched, options={"end_barrier": True}) # Provide kernel-specific OpenCL optimization options for kern in sched.kernels(): # Move the PSy-layer loop boundaries inside the kernel as a kernel # mask, this allows to iterate through the whole domain move_boundaries_trans.apply(kern) # Change the syntax to remove the return statements introduced by the # previous transformation fold_trans.apply(kern.get_kernel_schedule()) # Specify the OpenCL queue and workgroup size of the kernel kern.set_opencl_options({"queue_number": 1, 'local_size': 4}) return psy
def test_set_kern_float_arg(kernel_outputdir): ''' Check that we generate correct code to set a real, scalar kernel argument. ''' psy, _ = get_invoke("single_invoke_scalar_float_arg.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) expected = '''\ SUBROUTINE bc_ssh_code_set_args(kernel_obj, a_scalar, ssh_fld, ''' + \ '''xstop, tmask) USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER(KIND=c_intptr_t), intent(in), target :: ssh_fld, tmask INTEGER, intent(in), target :: xstop REAL(KIND=go_wp), intent(in), target :: a_scalar INTEGER ierr INTEGER(KIND=c_intptr_t), target :: kernel_obj ''' assert expected in generated_code expected = '''\ ! Set the arguments for the bc_ssh_code OpenCL Kernel ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(a_scalar), C_LOC(a_scalar)) CALL check_status('clSetKernelArg: arg 0 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(ssh_fld), C_LOC(ssh_fld)) CALL check_status('clSetKernelArg: arg 1 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(xstop), C_LOC(xstop)) CALL check_status('clSetKernelArg: arg 2 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(tmask), C_LOC(tmask)) CALL check_status('clSetKernelArg: arg 3 of bc_ssh_code', ierr) END SUBROUTINE bc_ssh_code_set_args''' assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_psy_init(kernel_outputdir): ''' Check that we create a psy_init() routine that sets-up the OpenCL environment. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) expected = ( " SUBROUTINE psy_init()\n" " USE fortcl, ONLY: ocl_env_init, add_kernels\n" " CHARACTER(LEN=30) kernel_names(1)\n" " LOGICAL, save :: initialised=.False.\n" " ! Check to make sure we only execute this routine once\n" " IF (.not. initialised) THEN\n" " initialised = .True.\n" " ! Initialise the OpenCL environment/device\n" " CALL ocl_env_init\n" " ! The kernels this PSy layer module requires\n" " kernel_names(1) = \"compute_cu_code\"\n" " ! Create the OpenCL kernel objects. Expects to find all of " "the compiled\n" " ! kernels in PSYCLONE_KERNELS_FILE.\n" " CALL add_kernels(1, kernel_names)\n" " END IF \n" " END SUBROUTINE psy_init\n") assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_opencl_kernel_output_file(kernel_outputdir): '''Check that an OpenCL file named modulename_kernelname_0 is generated. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Create dummy boundary symbols for the "name" kernel sched.symbol_table.new_symbol("a", tag="xstart_name", symbol_type=DataSymbol, datatype=INTEGER_TYPE) sched.symbol_table.new_symbol("b", tag="xstop_name", symbol_type=DataSymbol, datatype=INTEGER_TYPE) sched.symbol_table.new_symbol("c", tag="ystart_name", symbol_type=DataSymbol, datatype=INTEGER_TYPE) sched.symbol_table.new_symbol("d", tag="ystop_name", symbol_type=DataSymbol, datatype=INTEGER_TYPE) otrans = OCLTrans() otrans.apply(sched) sched.kernels()[0].name = "name" _ = psy.gen # Generates the OpenCL kernels as a side-effect. assert os.path.exists( os.path.join(str(kernel_outputdir), "compute_cu_name_0.cl"))
def test_set_kern_args(kernel_outputdir): ''' Check that we generate the necessary code to set kernel arguments. ''' psy, _ = get_invoke("single_invoke_two_kernels.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) # Check we've only generated one set-args routine with arguments: # kernel object + kernel arguments + boundary values assert generated_code.count("SUBROUTINE compute_cu_code_set_args(" "kernel_obj, cu_fld, p_fld, u_fld, xstart, " "xstop, ystart, ystop)") == 1 # Declarations expected = '''\ USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER(KIND=c_intptr_t), intent(in), target :: cu_fld, p_fld, u_fld INTEGER, intent(in), target :: xstart, xstop, ystart, ystop INTEGER ierr INTEGER(KIND=c_intptr_t), target :: kernel_obj''' assert expected in generated_code expected = '''\ ! Set the arguments for the compute_cu_code OpenCL Kernel ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(cu_fld), C_LOC(cu_fld)) CALL check_status('clSetKernelArg: arg 0 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(p_fld), C_LOC(p_fld)) CALL check_status('clSetKernelArg: arg 1 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(u_fld), C_LOC(u_fld)) CALL check_status('clSetKernelArg: arg 2 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(xstart), C_LOC(xstart)) CALL check_status('clSetKernelArg: arg 3 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 4, C_SIZEOF(xstop), C_LOC(xstop)) CALL check_status('clSetKernelArg: arg 4 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 5, C_SIZEOF(ystart), C_LOC(ystart)) CALL check_status('clSetKernelArg: arg 5 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 6, C_SIZEOF(ystop), C_LOC(ystop)) CALL check_status('clSetKernelArg: arg 6 of compute_cu_code', ierr) END SUBROUTINE compute_cu_code_set_args''' assert expected in generated_code # The call to the set_args matches the expected kernel signature with # the boundary values converted to 0-indexing assert ("CALL compute_cu_code_set_args(kernel_compute_cu_code, " "cu_fld%device_ptr, p_fld%device_ptr, u_fld%device_ptr, " "xstart - 1, xstop - 1, " "ystart - 1, ystop - 1)" in generated_code) # There is also only one version of the set_args for the second kernel assert generated_code.count("SUBROUTINE time_smooth_code_set_args(" "kernel_obj, u_fld, unew_fld, uold_fld, " "xstart_1, xstop_1, ystart_1, ystop_1)") == 1 assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_field_arguments(kernel_outputdir): # pylint: disable=unused-argument ''' Test that with an invoke transformed to OpenCL that has fields, all the fields are initialized into OpenCL buffers, the data is copied in, and a function to get the data back from the device is also generated.''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() # The array size expression always uses the same field, in this case p_fld size_expression = "int(p_fld%grid%nx*p_fld%grid%ny, 8)" # For each of the invoke fields, add a conditional block that: # - Creates a OpenCL rw buffer. # - Writes the field data into the buffer. # - Marks data_on_device flag as true. # - Points the read_from_device_f attribute to the read_from_device # local function. # - Blocks OpenCL Queue until the data copy has finished. single_invoke_fields = ["cu_fld", "p_fld", "u_fld"] for field in single_invoke_fields: expected = ( " if (.not. {0}%data_on_device) then\n" " size_in_bytes = " + size_expression + "*c_sizeof({0}%data(1,1))\n" " ! create buffer on device\n" " {0}%device_ptr = create_rw_buffer(size_in_bytes)\n" " ierr = clenqueuewritebuffer(cmd_queues(1), " "{0}%device_ptr, cl_true, 0_8, size_in_bytes, " "c_loc({0}%data), 0, c_null_ptr, " "c_loc(write_event))\n" " {0}%data_on_device = .true.\n" " {0}%read_from_device_f => read_from_device\n" " ! block until data copies have finished\n" " ierr = clfinish(cmd_queues(1))\n" " end if\n").format(field) assert expected in generated_code # Check that the read_from_device routine has also been generated. expected = (" subroutine read_from_device(from, to, nx, ny, width)\n" " use iso_c_binding, only: c_intptr_t\n" " use fortcl, only: read_buffer\n" " integer(kind=c_intptr_t), intent(in) :: from\n" " real(kind=go_wp), intent(inout), dimension(:,:) :: to\n" " integer, intent(in) :: nx, ny, width\n" " call read_buffer(from, to, int(width*ny, kind=8))\n" " end subroutine read_from_device\n") assert expected in generated_code
def test_opencl_kernel_with_use(): ''' Check that we refuse to transform a Schedule to use OpenCL if any of the kernels use module data. ''' psy, _ = get_invoke("single_invoke_kern_with_use.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() with pytest.raises(TransformationError) as err: otrans.apply(sched) assert ("'kernel_with_use_code' contains the following symbols with " "'global' scope: ['rdt']. An OpenCL kernel cannot call other " "kernels and all of the data" in str(err.value))
def test_opencl_kernel_with_use(kernel_outputdir): ''' Check that we refuse to transform a Schedule to use OpenCL if any of the kernels use module data. ''' from psyclone.transformations import TransformationError psy, _ = get_invoke("single_invoke_kern_with_use.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() with pytest.raises(TransformationError) as err: otrans.apply(sched) assert ("'kernel_with_use_code' contains the following symbols with " "'global' scope: ['rdt']. PSyclone cannot currently" in str(err))
def test_set_arg_const_scalar(): ''' Check that an invoke that passes a scalar kernel argument by value is rejected. (We haven't yet implemented the necessary code for setting the value of such an argument in OpenCL.) ''' psy, _ = get_invoke("test00.1_invoke_kernel_using_const_scalar.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() with pytest.raises(NotImplementedError) as err: otrans.apply(sched) assert ("Cannot generate OpenCL for Invokes that contain kernels with " "arguments passed by value" in str(err))
def test_opencl_kernel_output_file_with_suffix(kernel_outputdir): '''Check that an OpenCL file named modulename_kernelname_0 is generated without the _code suffix in the kernelname ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) _ = psy.gen # Generates the OpenCL kernels as a side-effect. assert os.path.exists( os.path.join(str(kernel_outputdir), "compute_cu_compute_cu_0.cl"))
def test_opencl_kernel_output_file(kernel_outputdir): '''Check that an OpenCL file named modulename_kernelname_0 is generated. ''' import os psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) sched.kernels()[0].name = "name" _ = psy.gen # Generates the OpenCL kernels as a side-effect. assert os.path.exists( os.path.join(str(kernel_outputdir), "compute_cu_name_0.cl"))
def test_grid_proprty(kernel_outputdir): # pylint: disable=unused-argument ''' Test that using nx and ny from the gocean property dictionary works.''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() assert "globalsize = (/p_fld%grid%nx, p_fld%grid%ny/)" in generated_code expected = "size_in_bytes = int(p_fld%grid%nx*p_fld%grid%ny, 8)*" \ "c_sizeof(p_fld%data(1,1))" assert expected in generated_code
def test_set_kern_float_arg(): ''' Check that we generate correct code to set a real, scalar kernel argument. ''' psy, _ = get_invoke("single_invoke_scalar_float_arg.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) # This set_args has a name clash on xstop (one is a grid property and the # other a loop boundary). One of they should appear as 'xstop_1' expected = '''\ SUBROUTINE bc_ssh_code_set_args(kernel_obj, a_scalar, ssh_fld, xstop, \ tmask, xstart, xstop_1, ystart, ystop) USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER(KIND=c_intptr_t), intent(in), target :: ssh_fld, tmask INTEGER, intent(in), target :: xstop REAL(KIND=go_wp), intent(in), target :: a_scalar INTEGER, intent(in), target :: xstart, xstop_1, ystart, ystop INTEGER ierr INTEGER(KIND=c_intptr_t), target :: kernel_obj ''' assert expected in generated_code expected = '''\ ! Set the arguments for the bc_ssh_code OpenCL Kernel ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(a_scalar), C_LOC(a_scalar)) CALL check_status('clSetKernelArg: arg 0 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(ssh_fld), C_LOC(ssh_fld)) CALL check_status('clSetKernelArg: arg 1 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(xstop), C_LOC(xstop)) CALL check_status('clSetKernelArg: arg 2 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(tmask), C_LOC(tmask)) CALL check_status('clSetKernelArg: arg 3 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 4, C_SIZEOF(xstart), C_LOC(xstart)) CALL check_status('clSetKernelArg: arg 4 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 5, C_SIZEOF(xstop_1), C_LOC(xstop_1)) CALL check_status('clSetKernelArg: arg 5 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 6, C_SIZEOF(ystart), C_LOC(ystart)) CALL check_status('clSetKernelArg: arg 6 of bc_ssh_code', ierr) ierr = clSetKernelArg(kernel_obj, 7, C_SIZEOF(ystop), C_LOC(ystop)) CALL check_status('clSetKernelArg: arg 7 of bc_ssh_code', ierr) END SUBROUTINE bc_ssh_code_set_args''' assert expected in generated_code
def test_use_stmts(): ''' Test that generating code for OpenCL results in the correct module use statements. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() expected = '''\ subroutine invoke_0_compute_cu(cu_fld, p_fld, u_fld) use fortcl, only: create_rw_buffer use fortcl, only: get_num_cmd_queues, get_cmd_queues, get_kernel_by_name use clfortran use iso_c_binding''' assert expected in generated_code assert "if (first_time) then" in generated_code
def test_opencl_options_effects(): ''' Check that the OpenCL options produce the expected changes in the PSy layer. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) # By default there is 1 queue, with an end barrier and local_size is 64 assert "localsize = (/64, 1/)" in generated_code assert "ierr = clEnqueueNDRangeKernel(cmd_queues(1), " \ "kernel_compute_cu_code, 2, C_NULL_PTR, C_LOC(globalsize), " \ "C_LOC(localsize), 0, C_NULL_PTR, C_NULL_PTR)" in generated_code assert "! Block until all kernels have finished\n" \ " ierr = clFinish(cmd_queues(1))" in generated_code assert "ierr = clFinish(cmd_queues(2))" not in generated_code # Change kernel local_size to 4 sched.coded_kernels()[0].set_opencl_options({'local_size': 4}) generated_code = str(psy.gen) assert "localsize = (/4, 1/)" in generated_code # Change kernel queue to 2 (the barrier should then also go up to 2) sched.coded_kernels()[0].set_opencl_options({'queue_number': 2}) generated_code = str(psy.gen) assert "ierr = clEnqueueNDRangeKernel(cmd_queues(2), " \ "kernel_compute_cu_code, 2, C_NULL_PTR, C_LOC(globalsize), " \ "C_LOC(localsize), 0, C_NULL_PTR, C_NULL_PTR)" in generated_code assert "! Block until all kernels have finished\n" \ " ierr = clFinish(cmd_queues(1))\n" \ " ierr = clFinish(cmd_queues(2))\n" in generated_code assert "ierr = clFinish(cmd_queues(3))" not in generated_code # Remove barrier at the end of the Invoke otrans.apply(sched, options={'end_barrier': False}) generated_code = str(psy.gen) assert "! Block until all kernels have finished" not in generated_code assert "ierr = clFinish(cmd_queues(2))" not in generated_code
def test_opencl_options_validation(): ''' Check that OpenCL options which are not supported provide appropiate errors. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() # Unsupported options are not accepted with pytest.raises(TransformationError) as err: otrans.apply(sched, options={'unsupported': 1}) assert "InvokeSchedule does not support the OpenCL option 'unsupported'." \ in str(err.value) # end_barrier option must be a boolean with pytest.raises(TransformationError) as err: otrans.apply(sched, options={'end_barrier': 1}) assert "InvokeSchedule OpenCL option 'end_barrier' should be a boolean." \ in str(err.value) # enable_profiling option must be a boolean with pytest.raises(TransformationError) as err: otrans.apply(sched, options={'enable_profiling': 1}) assert ("InvokeSchedule OpenCL option 'enable_profiling' should be a " "boolean." in str(err.value)) # out_of_order option must be a boolean with pytest.raises(TransformationError) as err: otrans.apply(sched, options={'out_of_order': 1}) assert "InvokeSchedule OpenCL option 'out_of_order' should be a boolean." \ in str(err.value) # Unsupported kernel options are not accepted with pytest.raises(AttributeError) as err: sched.coded_kernels()[0].set_opencl_options({'unsupported': 1}) assert "CodedKern does not support the OpenCL option 'unsupported'." \ in str(err.value) # local_size must be an integer with pytest.raises(TypeError) as err: sched.coded_kernels()[0].set_opencl_options({'local_size': 'a'}) assert "CodedKern OpenCL option 'local_size' should be an integer." \ in str(err.value) # queue_number must be an integer with pytest.raises(TypeError) as err: sched.coded_kernels()[0].set_opencl_options({'queue_number': 'a'}) assert "CodedKern OpenCL option 'queue_number' should be an integer." \ in str(err.value)
def test_set_kern_args_real_grid_property(): ''' Check that we generate correct code to set a real scalar grid property. ''' psy, _ = get_invoke("driver_test.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) expected = '''\ SUBROUTINE compute_kernel_code_set_args(kernel_obj, out_fld, in_out_fld, \ in_fld, dx, dx, gphiu) USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER(KIND=c_intptr_t), intent(in), target :: out_fld, in_out_fld, \ in_fld, dx, gphiu REAL(KIND=go_wp), intent(in), target :: dx''' assert expected in generated_code
def test_opencl_kernel_output_file_with_suffix(kernel_outputdir): '''Check that an OpenCL file named modulename_kernelname_0 is generated without the _code suffix in the kernelname ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) _ = psy.gen # Generates the OpenCL kernels as a side-effect. assert os.path.exists( os.path.join(str(kernel_outputdir), "compute_cu_compute_cu_0.cl"))
def test_invoke_use_stmts_and_decls(kernel_outputdir, monkeypatch, debug_mode): ''' Test that generating code for OpenCL results in the correct module use statements and declarations. ''' api_config = Config.get().api_conf("gocean1.0") monkeypatch.setattr(api_config, "_debug_mode", debug_mode) psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() expected = '''\ subroutine invoke_0_compute_cu(cu_fld, p_fld, u_fld) use fortcl, only: create_rw_buffer\n''' # When in debug mode, import also the check_status function if debug_mode: expected += " use ocl_utils_mod, only: check_status\n" expected += '''\ use fortcl, only: get_num_cmd_queues, get_cmd_queues, get_kernel_by_name use clfortran use iso_c_binding type(r2d_field), intent(inout), target :: cu_fld, p_fld, u_fld integer xstart, xstop, ystart, ystop integer(kind=c_size_t), target :: localsize(2) integer(kind=c_size_t), target :: globalsize(2) integer(kind=c_intptr_t), target :: write_event integer(kind=c_size_t) size_in_bytes integer(kind=c_intptr_t), target, save :: kernel_compute_cu_code logical, save :: first_time=.true. integer ierr integer(kind=c_intptr_t), pointer, save :: cmd_queues(:) integer, save :: num_cmd_queues ''' assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_grid_proprty(kernel_outputdir): # pylint: disable=unused-argument ''' Test that using nx and ny from the gocean property dictionary works.''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() assert "globalsize = (/p_fld%grid%nx, p_fld%grid%ny/)" in generated_code expected = "size_in_bytes = int(p_fld%grid%nx*p_fld%grid%ny, 8)*" \ "c_sizeof(p_fld%data(1,1))" assert expected in generated_code
def test_invoke_opencl_initialisation(kernel_outputdir): ''' Test that generating code for OpenCL results in the correct OpenCL first time initialisation code ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen).lower() # Test that the necessary variables are declared at the beginning # of the invoke expected = '''\ integer(kind=c_size_t), target :: localsize(2) integer(kind=c_size_t), target :: globalsize(2) integer(kind=c_intptr_t), target :: write_event integer(kind=c_size_t) size_in_bytes integer(kind=c_intptr_t), target, save :: kernel_compute_cu_code logical, save :: first_time=.true. integer ierr integer(kind=c_intptr_t), pointer, save :: cmd_queues(:) integer, save :: num_cmd_queues''' assert expected in generated_code # Test that a conditional 'first_time' code is generated with the # expected initialisation statements expected = '''\ if (first_time) then first_time = .false. ! ensure opencl run-time is initialised for this psy-layer module call psy_init num_cmd_queues = get_num_cmd_queues() cmd_queues => get_cmd_queues() kernel_compute_cu_code = get_kernel_by_name("compute_cu_code") end if''' assert expected in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def trans(psy): ''' Transformation routine for use with PSyclone. Applies the OpenCL transform to the first Invoke in the psy object. :param psy: the PSy object which this script will transform. :type psy: :py:class:`psyclone.psyGen.PSy` :returns: the transformed PSy object. :rtype: :py:class:`psyclone.psyGen.PSy` ''' from psyclone.transformations import OCLTrans # Get the Schedule associated with the first Invoke invoke = psy.invokes.invoke_list[0] sched = invoke.schedule # Transform the Schedule cltrans = OCLTrans() cltrans.apply(sched) return psy
def test_psy_init_with_options(kernel_outputdir): ''' Check that we create a psy_init() routine that sets-up the OpenCL environment with the provided non-default options. ''' psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in sched.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(sched, options={ "end_barrier": True, "enable_profiling": True, "out_of_order": True }) generated_code = str(psy.gen) assert "CALL ocl_env_init(1, ocl_device_num, .True., .True.)\n" \ in generated_code assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_opencl_options_validation(kernel_outputdir): ''' Check that OpenCL options which are not supported provide appropiate errors. ''' from psyclone.transformations import TransformationError psy, _ = get_invoke("single_invoke.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() # Unsupported options are not accepted with pytest.raises(TransformationError) as err: otrans.apply(sched, options={'unsupported': 1}) assert "InvokeSchedule does not support the opencl_option 'unsupported'." \ in str(err) # end_barrier option must be a boolean with pytest.raises(TransformationError) as err: otrans.apply(sched, options={'end_barrier': 1}) assert "InvokeSchedule opencl_option 'end_barrier' should be a boolean." \ in str(err) # Unsupported kernel options are not accepted with pytest.raises(AttributeError) as err: sched.coded_kernels()[0].set_opencl_options({'unsupported': 1}) assert "CodedKern does not support the opencl_option 'unsupported'." \ in str(err) # local_size must be an integer with pytest.raises(TypeError) as err: sched.coded_kernels()[0].set_opencl_options({'local_size': 'a'}) assert "CodedKern opencl_option 'local_size' should be an integer." \ in str(err) # queue_number must be an integer with pytest.raises(TypeError) as err: sched.coded_kernels()[0].set_opencl_options({'queue_number': 'a'}) assert "CodedKern opencl_option 'queue_number' should be an integer." \ in str(err)
def test_set_kern_args(kernel_outputdir): ''' Check that we generate the necessary code to set kernel arguments. ''' psy, _ = get_invoke("single_invoke_two_kernels.f90", API, idx=0) sched = psy.invokes.invoke_list[0].schedule otrans = OCLTrans() otrans.apply(sched) generated_code = str(psy.gen) # Check we've only generated one set-args routine assert generated_code.count("SUBROUTINE compute_cu_code_set_args(" "kernel_obj, nx, cu_fld, p_fld, u_fld)") == 1 # Declarations expected = '''\ SUBROUTINE compute_cu_code_set_args(kernel_obj, nx, cu_fld, p_fld, u_fld) USE clfortran, ONLY: clSetKernelArg USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t USE ocl_utils_mod, ONLY: check_status INTEGER ierr INTEGER(KIND=c_intptr_t), target :: cu_fld, p_fld, u_fld INTEGER(KIND=c_intptr_t), target :: kernel_obj''' assert expected in generated_code expected = '''\ ! Set the arguments for the compute_cu_code OpenCL Kernel ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(nx), C_LOC(nx)) ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(cu_fld), C_LOC(cu_fld)) CALL check_status('clSetKernelArg: arg 1 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(p_fld), C_LOC(p_fld)) CALL check_status('clSetKernelArg: arg 2 of compute_cu_code', ierr) ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(u_fld), C_LOC(u_fld)) CALL check_status('clSetKernelArg: arg 3 of compute_cu_code', ierr) END SUBROUTINE compute_cu_code_set_args''' assert expected in generated_code assert generated_code.count("SUBROUTINE time_smooth_code_set_args(" "kernel_obj, nx, u_fld, " "unew_fld, uold_fld)") == 1 assert ("CALL compute_cu_code_set_args(kernel_compute_cu_code, " "p_fld%grid%nx, cu_fld%device_ptr, p_fld%device_ptr, " "u_fld%device_ptr)" in generated_code) assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
def test_opencl_multi_invoke_options_validation(option_to_check): ''' Check that the OpenCL options constrains are enforced when there are multiple invokes. ''' psy, _ = get_invoke("test12_two_invokes_two_kernels.f90", API, idx=0) invoke1_schedule = psy.invokes.invoke_list[0].schedule invoke2_schedule = psy.invokes.invoke_list[1].schedule # Currently, moving the boundaries inside the kernel is a prerequisite # for the GOcean gen_ocl() code generation. trans = GOMoveIterationBoundariesInsideKernelTrans() for kernel in invoke1_schedule.coded_kernels(): trans.apply(kernel) for kernel in invoke2_schedule.coded_kernels(): trans.apply(kernel) otrans = OCLTrans() otrans.apply(invoke1_schedule, options={option_to_check: False}) otrans.apply(invoke2_schedule, options={option_to_check: True}) with pytest.raises(NotImplementedError) as err: _ = str(psy.gen) assert ("The current implementation creates a single OpenCL context for " "all the invokes which needs certain OpenCL options to match " "between invokes. Found '{0}' with unmatching values between " "invokes.".format(option_to_check) in str(err.value))