예제 #1
0
def test_opencl_kernel_output_file(kernel_outputdir):
    '''Check that an OpenCL file named modulename_kernelname_0 is generated.
    '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Create dummy boundary symbols for the "name" kernel
    sched.symbol_table.new_symbol("a",
                                  tag="xstart_name",
                                  symbol_type=DataSymbol,
                                  datatype=INTEGER_TYPE)
    sched.symbol_table.new_symbol("b",
                                  tag="xstop_name",
                                  symbol_type=DataSymbol,
                                  datatype=INTEGER_TYPE)
    sched.symbol_table.new_symbol("c",
                                  tag="ystart_name",
                                  symbol_type=DataSymbol,
                                  datatype=INTEGER_TYPE)
    sched.symbol_table.new_symbol("d",
                                  tag="ystop_name",
                                  symbol_type=DataSymbol,
                                  datatype=INTEGER_TYPE)

    otrans = OCLTrans()
    otrans.apply(sched)
    sched.kernels()[0].name = "name"
    _ = psy.gen  # Generates the OpenCL kernels as a side-effect.

    assert os.path.exists(
        os.path.join(str(kernel_outputdir), "compute_cu_name_0.cl"))
예제 #2
0
def test_psy_init(kernel_outputdir):
    ''' Check that we create a psy_init() routine that sets-up the
    OpenCL environment. '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen)
    expected = (
        "    SUBROUTINE psy_init()\n"
        "      USE fortcl, ONLY: ocl_env_init, add_kernels\n"
        "      CHARACTER(LEN=30) kernel_names(1)\n"
        "      LOGICAL, save :: initialised=.False.\n"
        "      ! Check to make sure we only execute this routine once\n"
        "      IF (.not. initialised) THEN\n"
        "        initialised = .True.\n"
        "        ! Initialise the OpenCL environment/device\n"
        "        CALL ocl_env_init\n"
        "        ! The kernels this PSy layer module requires\n"
        "        kernel_names(1) = \"compute_cu_code\"\n"
        "        ! Create the OpenCL kernel objects. Expects to find all of "
        "the compiled\n"
        "        ! kernels in PSYCLONE_KERNELS_FILE.\n"
        "        CALL add_kernels(1, kernel_names)\n"
        "      END IF \n"
        "    END SUBROUTINE psy_init\n")

    assert expected in generated_code
    assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
예제 #3
0
def test_set_kern_float_arg(kernel_outputdir):
    ''' Check that we generate correct code to set a real, scalar kernel
    argument. '''
    psy, _ = get_invoke("single_invoke_scalar_float_arg.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen)
    expected = '''\
    SUBROUTINE bc_ssh_code_set_args(kernel_obj, a_scalar, ssh_fld, ''' + \
        '''xstop, tmask)
      USE clfortran, ONLY: clSetKernelArg
      USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t
      USE ocl_utils_mod, ONLY: check_status
      INTEGER(KIND=c_intptr_t), intent(in), target :: ssh_fld, tmask
      INTEGER, intent(in), target :: xstop
      REAL(KIND=go_wp), intent(in), target :: a_scalar
      INTEGER ierr
      INTEGER(KIND=c_intptr_t), target :: kernel_obj
'''
    assert expected in generated_code
    expected = '''\
      ! Set the arguments for the bc_ssh_code OpenCL Kernel
      ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(a_scalar), C_LOC(a_scalar))
      CALL check_status('clSetKernelArg: arg 0 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(ssh_fld), C_LOC(ssh_fld))
      CALL check_status('clSetKernelArg: arg 1 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(xstop), C_LOC(xstop))
      CALL check_status('clSetKernelArg: arg 2 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(tmask), C_LOC(tmask))
      CALL check_status('clSetKernelArg: arg 3 of bc_ssh_code', ierr)
    END SUBROUTINE bc_ssh_code_set_args'''
    assert expected in generated_code
    assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
예제 #4
0
def test_set_kern_args_real_grid_property():
    ''' Check that we generate correct code to set a real scalar grid
    property. '''
    psy, _ = get_invoke("driver_test.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen)
    expected = '''\
    SUBROUTINE compute_kernel_code_set_args(kernel_obj, out_fld, in_out_fld, \
in_fld, dx, dx_1, gphiu, xstart, xstop, ystart, ystop)
      USE clfortran, ONLY: clSetKernelArg
      USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t
      USE ocl_utils_mod, ONLY: check_status
      INTEGER(KIND=c_intptr_t), intent(in), target :: out_fld, in_out_fld, \
in_fld, dx, gphiu
      REAL(KIND=go_wp), intent(in), target :: dx_1
      INTEGER, intent(in), target :: xstart, xstop, ystart, ystop'''
    assert expected in generated_code
예제 #5
0
def trans(psy):
    '''
    Transformation routine for use with PSyclone. Applies the OpenCL
    transform to the first Invoke in the psy object.

    :param psy: the PSy object which this script will transform.
    :type psy: :py:class:`psyclone.psyGen.PSy`
    :returns: the transformed PSy object.
    :rtype: :py:class:`psyclone.psyGen.PSy`

    '''
    from psyclone.transformations import OCLTrans, KernelGlobalsToArguments

    # Get the Schedule associated with the first Invoke
    invoke = psy.invokes.invoke_list[0]
    sched = invoke.schedule

    # Convert any kernel accesses to global data into arguments
    ktrans = KernelGlobalsToArguments()
    for kern in sched.kernels():
        ktrans.apply(kern)

    # Transform the Schedule
    cltrans = OCLTrans()
    cltrans.apply(sched, options={"end_barrier": True})

    # Provide kernel-specific OpenCL optimization options
    for kern in sched.kernels():
        kern.set_opencl_options({"queue_number": 1, 'local_size': 4})

    return psy
예제 #6
0
def trans(psy):
    '''
    Transformation routine for use with PSyclone. Applies the OpenCL
    transform to the first Invoke in the psy object.

    :param psy: the PSy object which this script will transform.
    :type psy: :py:class:`psyclone.psyGen.PSy`
    :returns: the transformed PSy object.
    :rtype: :py:class:`psyclone.psyGen.PSy`

    '''
    ocl_trans = OCLTrans()
    fold_trans = FoldConditionalReturnExpressionsTrans()
    move_boundaries_trans = GOMoveIterationBoundariesInsideKernelTrans()

    # Get the Schedule associated with the first Invoke
    invoke = psy.invokes.invoke_list[0]
    sched = invoke.schedule

    # Transform the Schedule
    ocl_trans.apply(sched, options={"end_barrier": True})

    # Provide kernel-specific OpenCL optimization options
    for kern in sched.kernels():
        # Move the PSy-layer loop boundaries inside the kernel as a kernel
        # mask, this allows to iterate through the whole domain
        move_boundaries_trans.apply(kern)
        # Change the syntax to remove the return statements introduced by the
        # previous transformation
        fold_trans.apply(kern.get_kernel_schedule())
        # Specify the OpenCL queue and workgroup size of the kernel
        kern.set_opencl_options({"queue_number": 1, 'local_size': 4})

    return psy
예제 #7
0
def test_opencl_kernel_missing_boundary_symbol():
    '''Check that an OpenCL file named modulename_kernelname_0 is generated.
    '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule

    # Create dummy boundary symbols for the "name" kernel with one missing
    # symbol
    sched.symbol_table.new_symbol("a",
                                  tag="xstart_name",
                                  symbol_type=DataSymbol,
                                  datatype=INTEGER_TYPE)
    sched.symbol_table.new_symbol("c",
                                  tag="ystart_name",
                                  symbol_type=DataSymbol,
                                  datatype=INTEGER_TYPE)
    sched.symbol_table.new_symbol("d",
                                  tag="ystop_name",
                                  symbol_type=DataSymbol,
                                  datatype=INTEGER_TYPE)

    otrans = OCLTrans()
    otrans.apply(sched)
    sched.kernels()[0].name = "name"

    with pytest.raises(GenerationError) as err:
        _ = psy.gen  # Generates the OpenCL kernels as a side-effect.
    assert ("Boundary symbol tag 'xstop_name' not found while generating the "
            "OpenCL code for kernel 'name'. Make sure to apply the "
            "GOMoveIterationBoundariesInsideKernelTrans before attempting the"
            " OpenCL code generation." in str(err.value))
예제 #8
0
def trans(psy):
    '''
    Transformation routine for use with PSyclone. Applies the OpenCL
    transform to the first Invoke in the psy object.

    :param psy: the PSy object which this script will transform.
    :type psy: :py:class:`psyclone.psyGen.PSy`
    :returns: the transformed PSy object.
    :rtype: :py:class:`psyclone.psyGen.PSy`

    '''

    # Get the Schedule associated with the first Invoke
    invoke = psy.invokes.invoke_list[0]
    sched = invoke.schedule

    # Convert any kernel accesses to global data into arguments
    ktrans = KernelGlobalsToArguments()
    for kern in sched.kernels():
        ktrans.apply(kern)

    # Transform the Schedule
    cltrans = OCLTrans()
    cltrans.apply(sched, options={"end_barrier": True})

    # Provide kernel-specific OpenCL optimization options
    move_boundaries_trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kern in sched.kernels():
        # Move the PSy-layer loop boundaries inside the kernel as a kernel
        # mask, this allows to iterate through the whole domain
        move_boundaries_trans.apply(kern)
        # Specify the OpenCL queue and workgroup size of the kernel
        kern.set_opencl_options({"queue_number": 1, 'local_size': 4})

    return psy
예제 #9
0
def test_set_kern_args(kernel_outputdir):
    ''' Check that we generate the necessary code to set kernel arguments. '''
    psy, _ = get_invoke("single_invoke_two_kernels.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen)
    # Check we've only generated one set-args routine with arguments:
    # kernel object + kernel arguments + boundary values
    assert generated_code.count("SUBROUTINE compute_cu_code_set_args("
                                "kernel_obj, cu_fld, p_fld, u_fld, xstart, "
                                "xstop, ystart, ystop)") == 1
    # Declarations
    expected = '''\
      USE clfortran, ONLY: clSetKernelArg
      USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t
      USE ocl_utils_mod, ONLY: check_status
      INTEGER(KIND=c_intptr_t), intent(in), target :: cu_fld, p_fld, u_fld
      INTEGER, intent(in), target :: xstart, xstop, ystart, ystop
      INTEGER ierr
      INTEGER(KIND=c_intptr_t), target :: kernel_obj'''
    assert expected in generated_code
    expected = '''\
      ! Set the arguments for the compute_cu_code OpenCL Kernel
      ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(cu_fld), C_LOC(cu_fld))
      CALL check_status('clSetKernelArg: arg 0 of compute_cu_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(p_fld), C_LOC(p_fld))
      CALL check_status('clSetKernelArg: arg 1 of compute_cu_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(u_fld), C_LOC(u_fld))
      CALL check_status('clSetKernelArg: arg 2 of compute_cu_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(xstart), C_LOC(xstart))
      CALL check_status('clSetKernelArg: arg 3 of compute_cu_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 4, C_SIZEOF(xstop), C_LOC(xstop))
      CALL check_status('clSetKernelArg: arg 4 of compute_cu_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 5, C_SIZEOF(ystart), C_LOC(ystart))
      CALL check_status('clSetKernelArg: arg 5 of compute_cu_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 6, C_SIZEOF(ystop), C_LOC(ystop))
      CALL check_status('clSetKernelArg: arg 6 of compute_cu_code', ierr)
    END SUBROUTINE compute_cu_code_set_args'''
    assert expected in generated_code

    # The call to the set_args matches the expected kernel signature with
    # the boundary values converted to 0-indexing
    assert ("CALL compute_cu_code_set_args(kernel_compute_cu_code, "
            "cu_fld%device_ptr, p_fld%device_ptr, u_fld%device_ptr, "
            "xstart - 1, xstop - 1, "
            "ystart - 1, ystop - 1)" in generated_code)

    # There is also only one version of the set_args for the second kernel
    assert generated_code.count("SUBROUTINE time_smooth_code_set_args("
                                "kernel_obj, u_fld, unew_fld, uold_fld, "
                                "xstart_1, xstop_1, ystart_1, ystop_1)") == 1
    assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
예제 #10
0
def test_field_arguments(kernel_outputdir):
    # pylint: disable=unused-argument
    ''' Test that with an invoke transformed to OpenCL that has fields,
    all the fields are initialized into OpenCL buffers, the data is copied
    in, and a function to get the data back from the device is also
    generated.'''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen).lower()

    # The array size expression always uses the same field, in this case p_fld
    size_expression = "int(p_fld%grid%nx*p_fld%grid%ny, 8)"

    # For each of the invoke fields, add a conditional block that:
    # - Creates a OpenCL rw buffer.
    # - Writes the field data into the buffer.
    # - Marks data_on_device flag as true.
    # - Points the read_from_device_f attribute to the read_from_device
    # local function.
    # - Blocks OpenCL Queue until the data copy has finished.
    single_invoke_fields = ["cu_fld", "p_fld", "u_fld"]
    for field in single_invoke_fields:
        expected = (
            "      if (.not. {0}%data_on_device) then\n"
            "        size_in_bytes = " + size_expression +
            "*c_sizeof({0}%data(1,1))\n"
            "        ! create buffer on device\n"
            "        {0}%device_ptr = create_rw_buffer(size_in_bytes)\n"
            "        ierr = clenqueuewritebuffer(cmd_queues(1), "
            "{0}%device_ptr, cl_true, 0_8, size_in_bytes, "
            "c_loc({0}%data), 0, c_null_ptr, "
            "c_loc(write_event))\n"
            "        {0}%data_on_device = .true.\n"
            "        {0}%read_from_device_f => read_from_device\n"
            "        ! block until data copies have finished\n"
            "        ierr = clfinish(cmd_queues(1))\n"
            "      end if\n").format(field)
        assert expected in generated_code

    # Check that the read_from_device routine has also been generated.
    expected = ("    subroutine read_from_device(from, to, nx, ny, width)\n"
                "      use iso_c_binding, only: c_intptr_t\n"
                "      use fortcl, only: read_buffer\n"
                "      integer(kind=c_intptr_t), intent(in) :: from\n"
                "      real(kind=go_wp), intent(inout), dimension(:,:) :: to\n"
                "      integer, intent(in) :: nx, ny, width\n"
                "      call read_buffer(from, to, int(width*ny, kind=8))\n"
                "    end subroutine read_from_device\n")
    assert expected in generated_code
예제 #11
0
def test_opencl_options_validation():
    ''' Check that OpenCL options which are not supported provide appropiate
    errors.
    '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()

    # Unsupported options are not accepted
    with pytest.raises(TransformationError) as err:
        otrans.apply(sched, options={'unsupported': 1})
    assert "InvokeSchedule does not support the OpenCL option 'unsupported'." \
        in str(err.value)

    # end_barrier option must be a boolean
    with pytest.raises(TransformationError) as err:
        otrans.apply(sched, options={'end_barrier': 1})
    assert "InvokeSchedule OpenCL option 'end_barrier' should be a boolean." \
        in str(err.value)

    # enable_profiling option must be a boolean
    with pytest.raises(TransformationError) as err:
        otrans.apply(sched, options={'enable_profiling': 1})
    assert ("InvokeSchedule OpenCL option 'enable_profiling' should be a "
            "boolean." in str(err.value))

    # out_of_order option must be a boolean
    with pytest.raises(TransformationError) as err:
        otrans.apply(sched, options={'out_of_order': 1})
    assert "InvokeSchedule OpenCL option 'out_of_order' should be a boolean." \
        in str(err.value)

    # Unsupported kernel options are not accepted
    with pytest.raises(AttributeError) as err:
        sched.coded_kernels()[0].set_opencl_options({'unsupported': 1})
    assert "CodedKern does not support the OpenCL option 'unsupported'." \
        in str(err.value)

    # local_size must be an integer
    with pytest.raises(TypeError) as err:
        sched.coded_kernels()[0].set_opencl_options({'local_size': 'a'})
    assert "CodedKern OpenCL option 'local_size' should be an integer." \
        in str(err.value)

    # queue_number must be an integer
    with pytest.raises(TypeError) as err:
        sched.coded_kernels()[0].set_opencl_options({'queue_number': 'a'})
    assert "CodedKern OpenCL option 'queue_number' should be an integer." \
        in str(err.value)
예제 #12
0
def test_opencl_kernel_with_use(kernel_outputdir):
    ''' Check that we refuse to transform a Schedule to use OpenCL if any
    of the kernels use module data. '''
    from psyclone.transformations import TransformationError
    psy, _ = get_invoke("single_invoke_kern_with_use.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    with pytest.raises(TransformationError) as err:
        otrans.apply(sched)
    assert ("'kernel_with_use_code' contains the following symbols with "
            "'global' scope: ['rdt']. PSyclone cannot currently" in str(err))
예제 #13
0
def test_opencl_kernel_with_use():
    ''' Check that we refuse to transform a Schedule to use OpenCL if any
    of the kernels use module data. '''
    psy, _ = get_invoke("single_invoke_kern_with_use.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    with pytest.raises(TransformationError) as err:
        otrans.apply(sched)
    assert ("'kernel_with_use_code' contains the following symbols with "
            "'global' scope: ['rdt']. An OpenCL kernel cannot call other "
            "kernels and all of the data" in str(err.value))
예제 #14
0
def test_set_arg_const_scalar():
    ''' Check that an invoke that passes a scalar kernel argument by
    value is rejected. (We haven't yet implemented the necessary code for
    setting the value of such an argument in OpenCL.) '''
    psy, _ = get_invoke("test00.1_invoke_kernel_using_const_scalar.f90",
                        API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    with pytest.raises(NotImplementedError) as err:
        otrans.apply(sched)
    assert ("Cannot generate OpenCL for Invokes that contain kernels with "
            "arguments passed by value" in str(err))
예제 #15
0
def test_opencl_kernel_output_file_with_suffix(kernel_outputdir):
    '''Check that an OpenCL file named modulename_kernelname_0 is
    generated without the _code suffix in the kernelname
    '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    otrans.apply(sched)
    _ = psy.gen  # Generates the OpenCL kernels as a side-effect.

    assert os.path.exists(
        os.path.join(str(kernel_outputdir), "compute_cu_compute_cu_0.cl"))
예제 #16
0
def test_opencl_kernel_output_file(kernel_outputdir):
    '''Check that an OpenCL file named modulename_kernelname_0 is generated.
    '''
    import os
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    otrans.apply(sched)
    sched.kernels()[0].name = "name"
    _ = psy.gen  # Generates the OpenCL kernels as a side-effect.

    assert os.path.exists(
        os.path.join(str(kernel_outputdir), "compute_cu_name_0.cl"))
예제 #17
0
def test_grid_proprty(kernel_outputdir):
    # pylint: disable=unused-argument
    ''' Test that using nx and ny from the gocean property dictionary
    works.'''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen).lower()
    assert "globalsize = (/p_fld%grid%nx, p_fld%grid%ny/)" in generated_code
    expected = "size_in_bytes = int(p_fld%grid%nx*p_fld%grid%ny, 8)*" \
               "c_sizeof(p_fld%data(1,1))"
    assert expected in generated_code
예제 #18
0
def test_set_kern_float_arg():
    ''' Check that we generate correct code to set a real, scalar kernel
    argument. '''
    psy, _ = get_invoke("single_invoke_scalar_float_arg.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen)
    # This set_args has a name clash on xstop (one is a grid property and the
    # other a loop boundary). One of they should appear as 'xstop_1'
    expected = '''\
    SUBROUTINE bc_ssh_code_set_args(kernel_obj, a_scalar, ssh_fld, xstop, \
tmask, xstart, xstop_1, ystart, ystop)
      USE clfortran, ONLY: clSetKernelArg
      USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t
      USE ocl_utils_mod, ONLY: check_status
      INTEGER(KIND=c_intptr_t), intent(in), target :: ssh_fld, tmask
      INTEGER, intent(in), target :: xstop
      REAL(KIND=go_wp), intent(in), target :: a_scalar
      INTEGER, intent(in), target :: xstart, xstop_1, ystart, ystop
      INTEGER ierr
      INTEGER(KIND=c_intptr_t), target :: kernel_obj
'''
    assert expected in generated_code
    expected = '''\
      ! Set the arguments for the bc_ssh_code OpenCL Kernel
      ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(a_scalar), C_LOC(a_scalar))
      CALL check_status('clSetKernelArg: arg 0 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(ssh_fld), C_LOC(ssh_fld))
      CALL check_status('clSetKernelArg: arg 1 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(xstop), C_LOC(xstop))
      CALL check_status('clSetKernelArg: arg 2 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(tmask), C_LOC(tmask))
      CALL check_status('clSetKernelArg: arg 3 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 4, C_SIZEOF(xstart), C_LOC(xstart))
      CALL check_status('clSetKernelArg: arg 4 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 5, C_SIZEOF(xstop_1), C_LOC(xstop_1))
      CALL check_status('clSetKernelArg: arg 5 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 6, C_SIZEOF(ystart), C_LOC(ystart))
      CALL check_status('clSetKernelArg: arg 6 of bc_ssh_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 7, C_SIZEOF(ystop), C_LOC(ystop))
      CALL check_status('clSetKernelArg: arg 7 of bc_ssh_code', ierr)
    END SUBROUTINE bc_ssh_code_set_args'''
    assert expected in generated_code
예제 #19
0
def test_use_stmts():
    ''' Test that generating code for OpenCL results in the correct
    module use statements. '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen).lower()
    expected = '''\
    subroutine invoke_0_compute_cu(cu_fld, p_fld, u_fld)
      use fortcl, only: create_rw_buffer
      use fortcl, only: get_num_cmd_queues, get_cmd_queues, get_kernel_by_name
      use clfortran
      use iso_c_binding'''
    assert expected in generated_code
    assert "if (first_time) then" in generated_code
예제 #20
0
def test_opencl_kernel_output_file_with_suffix(kernel_outputdir):
    '''Check that an OpenCL file named modulename_kernelname_0 is
    generated without the _code suffix in the kernelname
    '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    _ = psy.gen  # Generates the OpenCL kernels as a side-effect.

    assert os.path.exists(
        os.path.join(str(kernel_outputdir), "compute_cu_compute_cu_0.cl"))
예제 #21
0
def test_set_kern_args_real_grid_property():
    ''' Check that we generate correct code to set a real scalar grid
    property. '''
    psy, _ = get_invoke("driver_test.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen)
    expected = '''\
    SUBROUTINE compute_kernel_code_set_args(kernel_obj, out_fld, in_out_fld, \
in_fld, dx, dx, gphiu)
      USE clfortran, ONLY: clSetKernelArg
      USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t
      USE ocl_utils_mod, ONLY: check_status
      INTEGER(KIND=c_intptr_t), intent(in), target :: out_fld, in_out_fld, \
in_fld, dx, gphiu
      REAL(KIND=go_wp), intent(in), target :: dx'''
    assert expected in generated_code
예제 #22
0
def test_grid_proprty(kernel_outputdir):
    # pylint: disable=unused-argument
    ''' Test that using nx and ny from the gocean property dictionary
    works.'''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen).lower()
    assert "globalsize = (/p_fld%grid%nx, p_fld%grid%ny/)" in generated_code
    expected = "size_in_bytes = int(p_fld%grid%nx*p_fld%grid%ny, 8)*" \
               "c_sizeof(p_fld%data(1,1))"
    assert expected in generated_code
예제 #23
0
def test_invoke_use_stmts_and_decls(kernel_outputdir, monkeypatch, debug_mode):
    ''' Test that generating code for OpenCL results in the correct
    module use statements and declarations. '''
    api_config = Config.get().api_conf("gocean1.0")
    monkeypatch.setattr(api_config, "_debug_mode", debug_mode)
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule

    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen).lower()
    expected = '''\
    subroutine invoke_0_compute_cu(cu_fld, p_fld, u_fld)
      use fortcl, only: create_rw_buffer\n'''

    # When in debug mode, import also the check_status function
    if debug_mode:
        expected += "      use ocl_utils_mod, only: check_status\n"

    expected += '''\
      use fortcl, only: get_num_cmd_queues, get_cmd_queues, get_kernel_by_name
      use clfortran
      use iso_c_binding
      type(r2d_field), intent(inout), target :: cu_fld, p_fld, u_fld
      integer xstart, xstop, ystart, ystop
      integer(kind=c_size_t), target :: localsize(2)
      integer(kind=c_size_t), target :: globalsize(2)
      integer(kind=c_intptr_t), target :: write_event
      integer(kind=c_size_t) size_in_bytes
      integer(kind=c_intptr_t), target, save :: kernel_compute_cu_code
      logical, save :: first_time=.true.
      integer ierr
      integer(kind=c_intptr_t), pointer, save :: cmd_queues(:)
      integer, save :: num_cmd_queues
      '''
    assert expected in generated_code
    assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
예제 #24
0
def test_invoke_opencl_initialisation(kernel_outputdir):
    ''' Test that generating code for OpenCL results in the correct
    OpenCL first time initialisation code '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen).lower()

    # Test that the necessary variables are declared at the beginning
    # of the invoke
    expected = '''\
      integer(kind=c_size_t), target :: localsize(2)
      integer(kind=c_size_t), target :: globalsize(2)
      integer(kind=c_intptr_t), target :: write_event
      integer(kind=c_size_t) size_in_bytes
      integer(kind=c_intptr_t), target, save :: kernel_compute_cu_code
      logical, save :: first_time=.true.
      integer ierr
      integer(kind=c_intptr_t), pointer, save :: cmd_queues(:)
      integer, save :: num_cmd_queues'''
    assert expected in generated_code

    # Test that a conditional 'first_time' code is generated with the
    # expected initialisation statements
    expected = '''\
      if (first_time) then
        first_time = .false.
        ! ensure opencl run-time is initialised for this psy-layer module
        call psy_init
        num_cmd_queues = get_num_cmd_queues()
        cmd_queues => get_cmd_queues()
        kernel_compute_cu_code = get_kernel_by_name("compute_cu_code")
      end if'''
    assert expected in generated_code
    assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
예제 #25
0
def trans(psy):
    '''
    Transformation routine for use with PSyclone. Applies the OpenCL
    transform to the first Invoke in the psy object.

    :param psy: the PSy object which this script will transform.
    :type psy: :py:class:`psyclone.psyGen.PSy`
    :returns: the transformed PSy object.
    :rtype: :py:class:`psyclone.psyGen.PSy`

    '''
    from psyclone.transformations import OCLTrans

    # Get the Schedule associated with the first Invoke
    invoke = psy.invokes.invoke_list[0]
    sched = invoke.schedule

    # Transform the Schedule
    cltrans = OCLTrans()
    cltrans.apply(sched)

    return psy
예제 #26
0
def test_psy_init_with_options(kernel_outputdir):
    ''' Check that we create a psy_init() routine that sets-up the
    OpenCL environment with the provided non-default options. '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched,
                 options={
                     "end_barrier": True,
                     "enable_profiling": True,
                     "out_of_order": True
                 })
    generated_code = str(psy.gen)
    assert "CALL ocl_env_init(1, ocl_device_num, .True., .True.)\n" \
        in generated_code
    assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
예제 #27
0
def test_set_kern_args(kernel_outputdir):
    ''' Check that we generate the necessary code to set kernel arguments. '''
    psy, _ = get_invoke("single_invoke_two_kernels.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen)
    # Check we've only generated one set-args routine
    assert generated_code.count("SUBROUTINE compute_cu_code_set_args("
                                "kernel_obj, nx, cu_fld, p_fld, u_fld)") == 1
    # Declarations
    expected = '''\
    SUBROUTINE compute_cu_code_set_args(kernel_obj, nx, cu_fld, p_fld, u_fld)
      USE clfortran, ONLY: clSetKernelArg
      USE iso_c_binding, ONLY: c_sizeof, c_loc, c_intptr_t
      USE ocl_utils_mod, ONLY: check_status
      INTEGER ierr
      INTEGER(KIND=c_intptr_t), target :: cu_fld, p_fld, u_fld
      INTEGER(KIND=c_intptr_t), target :: kernel_obj'''
    assert expected in generated_code
    expected = '''\
      ! Set the arguments for the compute_cu_code OpenCL Kernel
      ierr = clSetKernelArg(kernel_obj, 0, C_SIZEOF(nx), C_LOC(nx))
      ierr = clSetKernelArg(kernel_obj, 1, C_SIZEOF(cu_fld), C_LOC(cu_fld))
      CALL check_status('clSetKernelArg: arg 1 of compute_cu_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 2, C_SIZEOF(p_fld), C_LOC(p_fld))
      CALL check_status('clSetKernelArg: arg 2 of compute_cu_code', ierr)
      ierr = clSetKernelArg(kernel_obj, 3, C_SIZEOF(u_fld), C_LOC(u_fld))
      CALL check_status('clSetKernelArg: arg 3 of compute_cu_code', ierr)
    END SUBROUTINE compute_cu_code_set_args'''
    assert expected in generated_code
    assert generated_code.count("SUBROUTINE time_smooth_code_set_args("
                                "kernel_obj, nx, u_fld, "
                                "unew_fld, uold_fld)") == 1
    assert ("CALL compute_cu_code_set_args(kernel_compute_cu_code, "
            "p_fld%grid%nx, cu_fld%device_ptr, p_fld%device_ptr, "
            "u_fld%device_ptr)" in generated_code)
    assert GOcean1p0OpenCLBuild(kernel_outputdir).code_compiles(psy)
예제 #28
0
def test_opencl_options_effects():
    ''' Check that the OpenCL options produce the expected changes in the
    PSy layer.
    '''
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in sched.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(sched)
    generated_code = str(psy.gen)

    # By default there is 1 queue, with an end barrier and local_size is 64
    assert "localsize = (/64, 1/)" in generated_code
    assert "ierr = clEnqueueNDRangeKernel(cmd_queues(1), " \
        "kernel_compute_cu_code, 2, C_NULL_PTR, C_LOC(globalsize), " \
        "C_LOC(localsize), 0, C_NULL_PTR, C_NULL_PTR)" in generated_code
    assert "! Block until all kernels have finished\n" \
        "      ierr = clFinish(cmd_queues(1))" in generated_code
    assert "ierr = clFinish(cmd_queues(2))" not in generated_code

    # Change kernel local_size to 4
    sched.coded_kernels()[0].set_opencl_options({'local_size': 4})
    generated_code = str(psy.gen)
    assert "localsize = (/4, 1/)" in generated_code

    # Change kernel queue to 2 (the barrier should then also go up to 2)
    sched.coded_kernels()[0].set_opencl_options({'queue_number': 2})
    generated_code = str(psy.gen)
    assert "ierr = clEnqueueNDRangeKernel(cmd_queues(2), " \
        "kernel_compute_cu_code, 2, C_NULL_PTR, C_LOC(globalsize), " \
        "C_LOC(localsize), 0, C_NULL_PTR, C_NULL_PTR)" in generated_code
    assert "! Block until all kernels have finished\n" \
        "      ierr = clFinish(cmd_queues(1))\n" \
        "      ierr = clFinish(cmd_queues(2))\n" in generated_code
    assert "ierr = clFinish(cmd_queues(3))" not in generated_code

    # Remove barrier at the end of the Invoke
    otrans.apply(sched, options={'end_barrier': False})
    generated_code = str(psy.gen)
    assert "! Block until all kernels have finished" not in generated_code
    assert "ierr = clFinish(cmd_queues(2))" not in generated_code
예제 #29
0
def test_opencl_options_validation(kernel_outputdir):
    ''' Check that OpenCL options which are not supported provide appropiate
    errors.
    '''
    from psyclone.transformations import TransformationError
    psy, _ = get_invoke("single_invoke.f90", API, idx=0)
    sched = psy.invokes.invoke_list[0].schedule
    otrans = OCLTrans()

    # Unsupported options are not accepted
    with pytest.raises(TransformationError) as err:
        otrans.apply(sched, options={'unsupported': 1})
    assert "InvokeSchedule does not support the opencl_option 'unsupported'." \
        in str(err)

    # end_barrier option must be a boolean
    with pytest.raises(TransformationError) as err:
        otrans.apply(sched, options={'end_barrier': 1})
    assert "InvokeSchedule opencl_option 'end_barrier' should be a boolean." \
        in str(err)

    # Unsupported kernel options are not accepted
    with pytest.raises(AttributeError) as err:
        sched.coded_kernels()[0].set_opencl_options({'unsupported': 1})
    assert "CodedKern does not support the opencl_option 'unsupported'." \
        in str(err)

    # local_size must be an integer
    with pytest.raises(TypeError) as err:
        sched.coded_kernels()[0].set_opencl_options({'local_size': 'a'})
    assert "CodedKern opencl_option 'local_size' should be an integer." \
        in str(err)

    # queue_number must be an integer
    with pytest.raises(TypeError) as err:
        sched.coded_kernels()[0].set_opencl_options({'queue_number': 'a'})
    assert "CodedKern opencl_option 'queue_number' should be an integer." \
        in str(err)
예제 #30
0
def test_opencl_multi_invoke_options_validation(option_to_check):
    ''' Check that the OpenCL options constrains are enforced when there are
    multiple invokes.
    '''
    psy, _ = get_invoke("test12_two_invokes_two_kernels.f90", API, idx=0)
    invoke1_schedule = psy.invokes.invoke_list[0].schedule
    invoke2_schedule = psy.invokes.invoke_list[1].schedule
    # Currently, moving the boundaries inside the kernel is a prerequisite
    # for the GOcean gen_ocl() code generation.
    trans = GOMoveIterationBoundariesInsideKernelTrans()
    for kernel in invoke1_schedule.coded_kernels():
        trans.apply(kernel)
    for kernel in invoke2_schedule.coded_kernels():
        trans.apply(kernel)

    otrans = OCLTrans()
    otrans.apply(invoke1_schedule, options={option_to_check: False})
    otrans.apply(invoke2_schedule, options={option_to_check: True})
    with pytest.raises(NotImplementedError) as err:
        _ = str(psy.gen)
    assert ("The current implementation creates a single OpenCL context for "
            "all the invokes which needs certain OpenCL options to match "
            "between invokes. Found '{0}' with unmatching values between "
            "invokes.".format(option_to_check) in str(err.value))