def test_compile(npct, subprocess): kernel_string = "this is a fake C program" kernel_name = "blabla" kernel_sources = KernelSource(kernel_string, "C") kernel_instance = KernelInstance(kernel_name, kernel_sources, kernel_string, [], None, None, dict(), []) cfunc = CFunctions() f = cfunc.compile(kernel_instance) print(subprocess.mock_calls) print(npct.mock_calls) print(f) assert len(subprocess.mock_calls) == 6 assert npct.load_library.called == 1 args, _ = npct.load_library.call_args_list[0] filename = args[0] print('filename=' + filename) # check if temporary files are cleaned up correctly import os.path assert not os.path.isfile(filename + ".cu") assert not os.path.isfile(filename + ".o") assert not os.path.isfile(filename + ".so")
def test_complies_fortran_function_with_module(): kernel_string = """ module my_fancy_module use iso_c_binding contains function my_test_function() result(time) use iso_c_binding real (c_float) :: time time = 42.0 end function my_test_function end module my_fancy_module """ kernel_name = "my_test_function" kernel_sources = KernelSource(kernel_string, "C") kernel_instance = KernelInstance(kernel_name, kernel_sources, kernel_string, [], None, None, dict(), []) cfunc = CFunctions(compiler="gfortran") func = cfunc.compile(kernel_instance) result = cfunc.run_kernel(func, [], (), ()) assert np.isclose(result, 42.0)
def test_ready_argument_list1(): arg1 = numpy.array([1, 2, 3]).astype(numpy.float32) arg2 = numpy.array([4, 5, 6]).astype(numpy.float64) arg3 = numpy.array([7, 8, 9]).astype(numpy.int32) arguments = [arg1, arg2, arg3] cfunc = CFunctions() output = cfunc.ready_argument_list(arguments) print(output) output_arg1 = numpy.ctypeslib.as_array(output[0].ctypes, shape=arg1.shape) output_arg2 = numpy.ctypeslib.as_array(output[1].ctypes, shape=arg2.shape) output_arg3 = numpy.ctypeslib.as_array(output[2].ctypes, shape=arg3.shape) assert output_arg1.dtype == 'float32' assert output_arg2.dtype == 'float64' assert output_arg3.dtype == 'int32' assert all(output_arg1 == arg1) assert all(output_arg2 == arg2) assert all(output_arg3 == arg3) assert output[0].numpy.dtype == 'float32' assert output[1].numpy.dtype == 'float64' assert output[2].numpy.dtype == 'int32' assert all(output[0].numpy == arg1) assert all(output[1].numpy == arg2) assert all(output[2].numpy == arg3)
def test_ready_argument_list1(): arg1 = numpy.array([1, 2, 3]).astype(numpy.float32) arg2 = numpy.array([4, 5, 6]).astype(numpy.float64) arg3 = numpy.array([7, 8, 9]).astype(numpy.int32) arguments = [arg1, arg2, arg3] cfunc = CFunctions() output = cfunc.ready_argument_list(arguments) print(output) output_arg1 = numpy.ctypeslib.as_array(output[0].ctypes, shape=arg1.shape) output_arg2 = numpy.ctypeslib.as_array(output[1].ctypes, shape=arg2.shape) output_arg3 = numpy.ctypeslib.as_array(output[2].ctypes, shape=arg3.shape) assert output_arg1.dtype == 'float32' assert output_arg2.dtype == 'float64' assert output_arg3.dtype == 'int32' assert all(output_arg1 == arg1) assert all(output_arg2 == arg2) assert all(output_arg3 == arg3) assert output[0].numpy.dtype == 'float32' assert output[1].numpy.dtype == 'float64' assert output[2].numpy.dtype == 'int32' assert all(output[0].numpy == arg1) assert all(output[1].numpy == arg2) assert all(output[2].numpy == arg3)
def test_ready_argument_list3(): arg1 = Mock() arguments = [arg1] cfunc = CFunctions() try: cfunc.ready_argument_list(arguments) assert False except Exception: assert True
def test_ready_argument_list3(): arg1 = Mock() arguments = [arg1] cfunc = CFunctions() try: cfunc.ready_argument_list(arguments) assert False except Exception: assert True
def test_memcpy_htod(): a = [1, 2, 3, 4] src = numpy.array(a).astype(numpy.float32) x = numpy.zeros_like(src) x_c = x.ctypes.data_as(C.POINTER(C.c_float)) arg = Argument(numpy=x, ctypes=x_c) cfunc = CFunctions() cfunc.memcpy_htod(arg, src) assert all(arg.numpy == a)
def test_memcpy_htod(): a = [1, 2, 3, 4] src = numpy.array(a).astype(numpy.float32) x = numpy.zeros_like(src) x_c = x.ctypes.data_as(C.POINTER(C.c_float)) arg = Argument(numpy=x, ctypes=x_c) cfunc = CFunctions() cfunc.memcpy_htod(arg, src) assert all(arg.numpy == a)
def test_ready_argument_list5(): arg1 = numpy.array([1, 2, 3]).astype(numpy.float32) arguments = [arg1] cfunc = CFunctions() output = cfunc.ready_argument_list(arguments) assert all(output[0].numpy == arg1) # test that a copy has been made arg1[0] = arg1[0] + 1 assert not all(output[0].numpy == arg1)
def test_memset(): a = [1, 2, 3, 4] x = numpy.array(a).astype(numpy.float32) x_c = x.ctypes.data_as(C.POINTER(C.c_float)) cfunc = CFunctions() cfunc.memset(x_c, 0, x.nbytes) output = numpy.ctypeslib.as_array(x_c, shape=(4, )) print(output) assert all(output == numpy.zeros(4))
def test_ready_argument_list5(): arg1 = numpy.array([1, 2, 3]).astype(numpy.float32) arguments = [arg1] cfunc = CFunctions() output = cfunc.ready_argument_list(arguments) assert all(output[0].numpy == arg1) # test that a copy has been made arg1[0] = arg1[0] + 1 assert not all(output[0].numpy == arg1)
def test_memcpy_dtoh(): a = [1, 2, 3, 4] x = numpy.array(a).astype(numpy.float32) x_c = x.ctypes.data_as(C.POINTER(C.c_float)) output = numpy.zeros_like(x) cfunc = CFunctions() cfunc.arg_mapping = {str(x_c): Argument(str(x.dtype), (4, ))} cfunc.memcpy_dtoh(output, x_c) print(a) print(output) assert all(output == a)
def test_memset(): a = [1, 2, 3, 4] x = numpy.array(a).astype(numpy.float32) x_c = x.ctypes.data_as(C.POINTER(C.c_float)) arg = Argument(numpy=x, ctypes=x_c) cfunc = CFunctions() cfunc.memset(arg, 0, x.nbytes) output = numpy.ctypeslib.as_array(x_c, shape=(4,)) print(output) assert all(output == numpy.zeros(4)) assert all(x == numpy.zeros(4))
def test_memset(): a = [1, 2, 3, 4] x = np.array(a).astype(np.float32) x_c = x.ctypes.data_as(C.POINTER(C.c_float)) arg = Argument(numpy=x, ctypes=x_c) cfunc = CFunctions() cfunc.memset(arg, 0, x.nbytes) output = np.ctypeslib.as_array(x_c, shape=(4,)) print(output) assert all(output == np.zeros(4)) assert all(x == np.zeros(4))
def test_memcpy_dtoh(): a = [1, 2, 3, 4] x = numpy.array(a).astype(numpy.float32) x_c = x.ctypes.data_as(C.POINTER(C.c_float)) arg = Argument(numpy=x, ctypes=x_c) output = numpy.zeros_like(x) cfunc = CFunctions() cfunc.memcpy_dtoh(output, arg) print(a) print(output) assert all(output == a) assert all(x == a)
def test_memcpy_dtoh(): a = [1, 2, 3, 4] x = numpy.array(a).astype(numpy.float32) x_c = x.ctypes.data_as(C.POINTER(C.c_float)) arg = Argument(numpy=x, ctypes=x_c) output = numpy.zeros_like(x) cfunc = CFunctions() cfunc.memcpy_dtoh(output, arg) print(a) print(output) assert all(output == a) assert all(x == a)
def __init__(self, kernel_source, device=0, platform=0, quiet=False, compiler=None, compiler_options=None, iterations=7, observers=None): """ Instantiate the DeviceInterface, based on language in kernel source :param kernel_source The kernel sources :type kernel_source: kernel_tuner.core.KernelSource :param device: CUDA/OpenCL device to use, in case you have multiple CUDA-capable GPUs or OpenCL devices you may use this to select one, 0 by default. Ignored if you are tuning host code by passing lang="C". :type device: int :param platform: OpenCL platform to use, in case you have multiple OpenCL platforms you may use this to select one, 0 by default. Ignored if not using OpenCL. :type device: int :param lang: Specifies the language used for GPU kernels. Currently supported: "CUDA", "OpenCL", or "C" :type lang: string :param compiler_options: The compiler options to use when compiling kernels for this device. :type compiler_options: list of strings :param iterations: Number of iterations to be used when benchmarking using this device. :type iterations: int :param times: Return the execution time of all iterations. :type times: bool """ lang = kernel_source.lang logging.debug('DeviceInterface instantiated, lang=%s', lang) if lang == "CUDA": dev = CudaFunctions(device, compiler_options=compiler_options, iterations=iterations, observers=observers) elif lang == "OpenCL": dev = OpenCLFunctions(device, platform, compiler_options=compiler_options, iterations=iterations, observers=observers) elif lang == "C": dev = CFunctions(compiler=compiler, compiler_options=compiler_options, iterations=iterations) else: raise Exception("Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet") #look for NVMLObserver in observers, if present, enable special tunable parameters through nvml self.use_nvml = False if observers: for obs in observers: if isinstance(obs, NVMLObserver): self.nvml = obs.nvml self.use_nvml = True self.lang = lang self.dev = dev self.units = dev.units self.name = dev.name if not quiet: print("Using: " + self.dev.name) dev.__enter__()
def __init__(self, original_kernel, device=0, platform=0, lang=None, quiet=False, compiler_options=None, iterations=7): """ Instantiate the DeviceInterface, based on language in kernel source :param original_kernel: The source of the kernel as passed to tune_kernel :type original_kernel: kernel source as a string or a list of strings denoting filenames :param device: CUDA/OpenCL device to use, in case you have multiple CUDA-capable GPUs or OpenCL devices you may use this to select one, 0 by default. Ignored if you are tuning host code by passing lang="C". :type device: int :param platform: OpenCL platform to use, in case you have multiple OpenCL platforms you may use this to select one, 0 by default. Ignored if not using OpenCL. :type device: int :param lang: Specifies the language used for GPU kernels. The kernel_tuner automatically detects the language, but if it fails, you may specify the language using this argument, currently supported: "CUDA", "OpenCL", or "C" :type lang: string :param compiler_options: The compiler options to use when compiling kernels for this device. :type compiler_options: list of strings :param iterations: Number of iterations to be used when benchmarking using this device. :type iterations: int """ logging.debug('DeviceInterface instantiated, lang=%s', lang) lang = util.detect_language(lang, original_kernel) if lang == "CUDA": dev = CudaFunctions(device, compiler_options=compiler_options, iterations=iterations) elif lang == "OpenCL": dev = OpenCLFunctions(device, platform, compiler_options=compiler_options, iterations=iterations) elif lang == "C": dev = CFunctions(compiler_options=compiler_options, iterations=iterations) else: raise Exception( "Sorry, support for languages other than CUDA, OpenCL, or C is not implemented yet" ) self.lang = lang self.dev = dev self.name = dev.name if not quiet: print("Using: " + self.dev.name)
def test_byte_array_arguments(): arg1 = numpy.array([1, 2, 3]).astype(numpy.int8) cfunc = CFunctions() output = cfunc.ready_argument_list([arg1]) output_arg1 = numpy.ctypeslib.as_array(output[0].ctypes, shape=arg1.shape) assert output_arg1.dtype == 'int8' assert all(output_arg1 == arg1) dest = numpy.zeros_like(arg1) cfunc.memcpy_dtoh(dest, output[0]) assert all(dest == arg1)
def test_byte_array_arguments(): arg1 = numpy.array([1, 2, 3]).astype(numpy.int8) cfunc = CFunctions() output = cfunc.ready_argument_list([arg1]) output_arg1 = numpy.ctypeslib.as_array(output[0], shape=arg1.shape) assert output_arg1.dtype == 'int8' assert all(output_arg1 == arg1) dest = numpy.zeros_like(arg1) cfunc.memcpy_dtoh(dest, output[0]) assert all(dest == arg1)
def test_ready_argument_list2(): arg1 = numpy.array([1, 2, 3]).astype(numpy.float32) arg2 = numpy.int32(7) arg3 = numpy.float32(6.0) arguments = [arg1, arg2, arg3] cfunc = CFunctions() output = cfunc.ready_argument_list(arguments) print(output) output_arg1 = numpy.ctypeslib.as_array(output[0].ctypes, shape=arg1.shape) assert output_arg1.dtype == 'float32' assert isinstance(output[1].ctypes, C.c_int32) assert isinstance(output[2].ctypes, C.c_float) assert all(output_arg1 == arg1) assert output[1][1].value == arg2 assert output[2][1].value == arg3
def test_ready_argument_list2(): arg1 = numpy.array([1, 2, 3]).astype(numpy.float32) arg2 = numpy.int32(7) arg3 = numpy.float32(6.0) arguments = [arg1, arg2, arg3] cfunc = CFunctions() output = cfunc.ready_argument_list(arguments) print(output) output_arg1 = numpy.ctypeslib.as_array(output[0], shape=arg1.shape) assert output_arg1.dtype == 'float32' assert isinstance(output[1], C.c_int32) assert isinstance(output[2], C.c_float) assert all(output_arg1 == arg1) assert output[1].value == arg2 assert output[2].value == arg3
def test_compile_detects_device_code(npct, subprocess): kernel_string = "this code clearly contains device code __global__ kernel(float* arg){ return; }" kernel_name = "blabla" kernel_sources = KernelSource(kernel_string, "C") kernel_instance = KernelInstance(kernel_name, kernel_sources, kernel_string, [], None, None, dict(), []) cfunc = CFunctions() cfunc.compile(kernel_instance) print(subprocess.check_call.call_args_list) # assert the filename suffix used for source compilation is .cu dot_cu_used = False for call in subprocess.check_call.call_args_list: args, kwargs = call args = args[0] print(args) if args[0] == 'nvcc' and args[1] == '-c': assert args[2][-3:] == '.cu' dot_cu_used = True assert dot_cu_used
def test_compile(npct, subprocess): kernel_string = "this is a fake C program" kernel_name = "blabla" cfunc = CFunctions() f = cfunc.compile(kernel_name, kernel_string) print(subprocess.mock_calls) print(npct.mock_calls) print(f) assert len(subprocess.mock_calls) == 6 assert npct.load_library.called == 1 args, _ = npct.load_library.call_args_list[0] filename = args[0] print('filename=' + filename) #check if temporary files are cleaned up correctly import os.path assert not os.path.isfile(filename + ".cu") assert not os.path.isfile(filename + ".o") assert not os.path.isfile(filename + ".so")
def test_ready_argument_list4(): with raises(TypeError): arg1 = int(9) cfunc = CFunctions() cfunc.ready_argument_list([arg1])
def test_ready_argument_list4(): with raises(TypeError): arg1 = int(9) cfunc = CFunctions() cfunc.ready_argument_list([arg1])
def test_ready_argument_list4(): arg1 = int(9) cfunc = CFunctions() cfunc.ready_argument_list([arg1])