def env(): kernel_string = """__global__ void vector_add(float *c, float *a, float *b, int n) { int i = blockIdx.x * block_size_x + threadIdx.x; if (i<n) { c[i] = a[i] + b[i]; } } """ args = get_vector_add_args() params = {"block_size_x": 128} lang = "CUDA" kernel_source = core.KernelSource(kernel_string, lang) verbose = True kernel_options = Options(kernel_name="vector_add", kernel_string=kernel_string, problem_size=args[-1], arguments=args, lang=lang, grid_div_x=None, grid_div_y=None, grid_div_z=None, cmem_args=None, texmem_args=None, block_size_names=None) device_options = Options(device=0, platform=0, lang=lang, quiet=False, compiler=None, compiler_options=None) dev = core.DeviceInterface(kernel_source, iterations=7, **device_options) instance = dev.create_kernel_instance(kernel_source, kernel_options, params, verbose) return dev, instance
def test_setup_method_options(): tuning_options = Options(eps=1e-5, tune_params=tune_params, strategy_options={}, verbose=True) method_options = minimize.setup_method_options("L-BFGS-B", tuning_options) assert isinstance(method_options, dict) assert method_options["eps"] == 1e-5 assert method_options["maxfun"] == 100 assert method_options["disp"] is True
def test__cost_func(): x = [1, 4] kernel_options = None tuning_options = Options(scaling=False, snap=False, tune_params=tune_params, restrictions=None, strategy_options={}, cache={}) runner = fake_runner() results = [] time = minimize._cost_func(x, kernel_options, tuning_options, runner, results) assert time == 5 tuning_options.cache["1,4"] = OrderedDict([("x", 1), ("y", 4), ("time", 5)]) time = minimize._cost_func(x, kernel_options, tuning_options, runner, results) assert time == 5 # check if 1st run is properly cached and runner is only called once assert runner.run.call_count == 1 # check if restrictions are properly handled restrictions = ["False"] tuning_options = Options(scaling=False, snap=False, tune_params=tune_params, restrictions=restrictions, strategy_options={}, verbose=True, cache={}) time = minimize._cost_func(x, kernel_options, tuning_options, runner, results) assert time == 1e20
def __init__(self, kernel_name, kernel_string, problem_size, arguments, params, inputs=None, outputs=None, device=0, platform=0, block_size_names=None, grid_div_x=None, grid_div_y=None, grid_div_z=None, verbose=True, lang=None): """ Construct Python helper object to compile and call the kernel from Python This object compiles a GPU kernel parameterized using the parameters in params. GPU memory is allocated for each argument using its size and type as listed in arguments. The object can be called directly as a function with the kernel arguments as function arguments. Kernel arguments marked as inputs will be copied to the GPU on every kernel launch. Only the kernel arguments marked as outputs will be returned, note that the result is always returned in a list, even when there is only one output. Most of the arguments to this function are the same as with tune_kernel or run_kernel in Kernel Tuner, and are therefore not duplicated here. The two new arguments are: :param inputs: a boolean list of length arguments to signal whether an argument is input to the kernel :type inputs: list(bool) :param outputs: a boolean list of length arguments to signal whether an argument is output of the kernel :type outputs: list(bool) """ #construct device interface kernel_source = core.KernelSource(kernel_string, lang) self.dev = core.DeviceInterface(kernel_source, device=device) #construct kernel_options to hold information about the kernel opts = locals() kernel_options = Options([(k, opts[k]) for k in _kernel_options.keys() if k in opts.keys()]) #instantiate the kernel given the parameters in params self.kernel_instance = self.dev.create_kernel_instance(kernel_source, kernel_options, params, verbose) #compile the kernel self.func = self.dev.compile_kernel(self.kernel_instance, verbose) #setup GPU memory self.gpu_args = self.dev.ready_argument_list(arguments) if inputs: self.inputs = inputs else: self.inputs = [True for _ in arguments] if outputs: self.outputs = outputs else: self.outputs = [True for _ in arguments]
def test_get_bounds_x0_eps(): tuning_options = Options() tuning_options["scaling"] = True tune_params = OrderedDict() tune_params['x'] = [0, 1, 2, 3, 4] tuning_options["tune_params"] = tune_params bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options) assert bounds == [(0.0, 1.0)] assert x0 == [0.5] assert eps == 0.2 tuning_options["scaling"] = False bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options) assert bounds == [(0, 4)] assert x0 == [2.0] assert eps == 1.0
def test_process_cache(): def assert_open_cachefile_is_correctly_parsed(cache): with open(cache, "r") as cachefile: filestr = cachefile.read() if filestr[-1] == ",": filestr = filestr[:-1] file_contents = filestr + "}\n}" cache_object = json.loads(file_contents) assert cache_object["device_name"] == "test_device" assert cache_object["kernel_name"] == "test_kernel" # get temp filename, but remove the file cache = get_temp_filename(suffix=".json") delete_temp_file(cache) kernel_options = Options(kernel_name="test_kernel") tuning_options = Options(cache=cache, tune_params=Options(x=[1, 2, 3, 4])) runner = Options(dev=Options(name="test_device")) try: # call process_cache without pre-existing cache process_cache(cache, kernel_options, tuning_options, runner) # check if file has been created assert os.path.isfile(cache) assert_open_cachefile_is_correctly_parsed(cache) assert tuning_options.cachefile == cache assert isinstance(tuning_options.cache, dict) assert len(tuning_options.cache) == 0 # store one entry in the cache params = {"x": 4, "time": np.float32(0.1234)} store_cache("4", params, tuning_options) assert len(tuning_options.cache) == 1 # close the cache close_cache(cache) # now test process cache with a pre-existing cache file process_cache(cache, kernel_options, tuning_options, runner) assert_open_cachefile_is_correctly_parsed(cache) assert tuning_options.cache["4"]["time"] == params["time"] # check that exceptions are raised when using a cache file for # a different kernel, device, or parameter set with pytest.raises(ValueError) as excp: kernel_options.kernel_name = "wrong_kernel" process_cache(cache, kernel_options, tuning_options, runner) assert "kernel" in str(excep.value) with pytest.raises(ValueError) as excp: runner.dev.name = "wrong_device" process_cache(cache, kernel_options, tuning_options, runner) assert "device" in str(excep.value) with pytest.raises(ValueError) as excp: tuning_options.tune_params["y"] = ["a", "b"] process_cache(cache, kernel_options, tuning_options, runner) assert "parameter" in str(excep.value) finally: delete_temp_file(cache)
from collections import OrderedDict from kernel_tuner.strategies import genetic_algorithm as ga from kernel_tuner.interface import Options tune_params = OrderedDict() tune_params["x"] = [1, 2, 3] tune_params["y"] = [4, 5, 6] tuning_options = Options(dict(restrictions=[], tune_params= tune_params)) max_threads = 1024 def test_weighted_choice(): pop_size = 5 pop = ga.random_population(pop_size, tune_params, tuning_options, max_threads) weighted_pop = [[p, i] for i, p in enumerate(pop)] result = ga.weighted_choice(weighted_pop, 1) assert result[0] in pop result = ga.weighted_choice(weighted_pop, 2) print(result) assert result[0] in pop assert result[1] in pop assert result[0] != result[1] def test_random_population(): pop_size = 8 pop = ga.random_population(pop_size, tune_params, tuning_options, max_threads)
from random import uniform as randfloat import numpy as np from collections import OrderedDict, namedtuple from kernel_tuner.interface import Options from kernel_tuner.strategies import minimize from kernel_tuner.strategies import bayes_opt from kernel_tuner.strategies.bayes_opt import BayesianOptimization tune_params = OrderedDict() tune_params["x"] = [1, 2, 3] tune_params["y"] = [4, 5, 6] tune_params["z"] = [7] strategy_options = dict(popsize=0, max_fevals=10) tuning_options = Options( dict(restrictions=[], tune_params=tune_params, strategy_options=strategy_options)) tuning_options["scaling"] = True tuning_options["snap"] = True max_threads = 1024 # initialize required data parameter_space = list(itertools.product(*tune_params.values())) _, _, eps = minimize.get_bounds_x0_eps(tuning_options) original_to_normalized, normalized_to_original = bayes_opt.generate_normalized_param_dicts( tune_params, eps) normalized_parameter_space = bayes_opt.normalize_parameter_space( parameter_space, tune_params, original_to_normalized) pruned_parameter_space, removed_tune_params = bayes_opt.prune_parameter_space( normalized_parameter_space, tuning_options, tune_params, original_to_normalized)