Beispiel #1
0
def env():
    kernel_string = """__global__ void vector_add(float *c, float *a, float *b, int n) {
            int i = blockIdx.x * block_size_x + threadIdx.x;
            if (i<n) {
                c[i] = a[i] + b[i];
            }
        } """
    args = get_vector_add_args()
    params = {"block_size_x": 128}

    lang = "CUDA"
    kernel_source = core.KernelSource(kernel_string, lang)
    verbose = True
    kernel_options = Options(kernel_name="vector_add",
                             kernel_string=kernel_string,
                             problem_size=args[-1],
                             arguments=args,
                             lang=lang,
                             grid_div_x=None,
                             grid_div_y=None,
                             grid_div_z=None,
                             cmem_args=None,
                             texmem_args=None,
                             block_size_names=None)
    device_options = Options(device=0,
                             platform=0,
                             lang=lang,
                             quiet=False,
                             compiler=None,
                             compiler_options=None)
    dev = core.DeviceInterface(kernel_source, iterations=7, **device_options)
    instance = dev.create_kernel_instance(kernel_source, kernel_options,
                                          params, verbose)

    return dev, instance
Beispiel #2
0
def test_setup_method_options():
    tuning_options = Options(eps=1e-5,
                             tune_params=tune_params,
                             strategy_options={},
                             verbose=True)

    method_options = minimize.setup_method_options("L-BFGS-B", tuning_options)
    assert isinstance(method_options, dict)
    assert method_options["eps"] == 1e-5
    assert method_options["maxfun"] == 100
    assert method_options["disp"] is True
Beispiel #3
0
def test__cost_func():

    x = [1, 4]
    kernel_options = None
    tuning_options = Options(scaling=False,
                             snap=False,
                             tune_params=tune_params,
                             restrictions=None,
                             strategy_options={},
                             cache={})
    runner = fake_runner()
    results = []

    time = minimize._cost_func(x, kernel_options, tuning_options, runner,
                               results)
    assert time == 5

    tuning_options.cache["1,4"] = OrderedDict([("x", 1), ("y", 4),
                                               ("time", 5)])

    time = minimize._cost_func(x, kernel_options, tuning_options, runner,
                               results)

    assert time == 5
    # check if 1st run is properly cached and runner is only called once
    assert runner.run.call_count == 1

    # check if restrictions are properly handled
    restrictions = ["False"]
    tuning_options = Options(scaling=False,
                             snap=False,
                             tune_params=tune_params,
                             restrictions=restrictions,
                             strategy_options={},
                             verbose=True,
                             cache={})
    time = minimize._cost_func(x, kernel_options, tuning_options, runner,
                               results)
    assert time == 1e20
Beispiel #4
0
    def __init__(self, kernel_name, kernel_string, problem_size, arguments, params, inputs=None, outputs=None, device=0, platform=0,
                 block_size_names=None, grid_div_x=None, grid_div_y=None, grid_div_z=None, verbose=True, lang=None):
        """ Construct Python helper object to compile and call the kernel from Python

            This object compiles a GPU kernel parameterized using the parameters in params.
            GPU memory is allocated for each argument using its size and type as listed in arguments.
            The object can be called directly as a function with the kernel arguments as function arguments.
            Kernel arguments marked as inputs will be copied to the GPU on every kernel launch.
            Only the kernel arguments marked as outputs will be returned, note that the result is always
            returned in a list, even when there is only one output.

            Most of the arguments to this function are the same as with tune_kernel or run_kernel in Kernel Tuner,
            and are therefore not duplicated here. The two new arguments are:

            :param inputs: a boolean list of length arguments to signal whether an argument is input to the kernel
            :type inputs: list(bool)

            :param outputs: a boolean list of length arguments to signal whether an argument is output of the kernel
            :type outputs: list(bool)

        """
        #construct device interface
        kernel_source = core.KernelSource(kernel_string, lang)
        self.dev = core.DeviceInterface(kernel_source, device=device)

        #construct kernel_options to hold information about the kernel
        opts = locals()
        kernel_options = Options([(k, opts[k]) for k in _kernel_options.keys() if k in opts.keys()])

        #instantiate the kernel given the parameters in params
        self.kernel_instance = self.dev.create_kernel_instance(kernel_source, kernel_options, params, verbose)

        #compile the kernel
        self.func = self.dev.compile_kernel(self.kernel_instance, verbose)

        #setup GPU memory
        self.gpu_args = self.dev.ready_argument_list(arguments)
        if inputs:
            self.inputs = inputs
        else:
            self.inputs = [True for _ in arguments]
        if outputs:
            self.outputs = outputs
        else:
            self.outputs = [True for _ in arguments]
Beispiel #5
0
def test_get_bounds_x0_eps():

    tuning_options = Options()
    tuning_options["scaling"] = True
    tune_params = OrderedDict()
    tune_params['x'] = [0, 1, 2, 3, 4]

    tuning_options["tune_params"] = tune_params

    bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options)

    assert bounds == [(0.0, 1.0)]
    assert x0 == [0.5]
    assert eps == 0.2

    tuning_options["scaling"] = False

    bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options)

    assert bounds == [(0, 4)]
    assert x0 == [2.0]
    assert eps == 1.0
Beispiel #6
0
def test_process_cache():

    def assert_open_cachefile_is_correctly_parsed(cache):
        with open(cache, "r") as cachefile:
            filestr = cachefile.read()
            if filestr[-1] == ",":
                filestr = filestr[:-1]
            file_contents = filestr + "}\n}"
        cache_object = json.loads(file_contents)
        assert cache_object["device_name"] == "test_device"
        assert cache_object["kernel_name"] == "test_kernel"

    # get temp filename, but remove the file
    cache = get_temp_filename(suffix=".json")
    delete_temp_file(cache)

    kernel_options = Options(kernel_name="test_kernel")
    tuning_options = Options(cache=cache, tune_params=Options(x=[1, 2, 3, 4]))
    runner = Options(dev=Options(name="test_device"))

    try:
        # call process_cache without pre-existing cache
        process_cache(cache, kernel_options, tuning_options, runner)

        # check if file has been created
        assert os.path.isfile(cache)
        assert_open_cachefile_is_correctly_parsed(cache)
        assert tuning_options.cachefile == cache
        assert isinstance(tuning_options.cache, dict)
        assert len(tuning_options.cache) == 0

        # store one entry in the cache
        params = {"x": 4, "time": np.float32(0.1234)}
        store_cache("4", params, tuning_options)
        assert len(tuning_options.cache) == 1

        # close the cache
        close_cache(cache)

        # now test process cache with a pre-existing cache file
        process_cache(cache, kernel_options, tuning_options, runner)
        assert_open_cachefile_is_correctly_parsed(cache)

        assert tuning_options.cache["4"]["time"] == params["time"]

        # check that exceptions are raised when using a cache file for
        # a different kernel, device, or parameter set
        with pytest.raises(ValueError) as excp:
            kernel_options.kernel_name = "wrong_kernel"
            process_cache(cache, kernel_options, tuning_options, runner)
            assert "kernel" in str(excep.value)

        with pytest.raises(ValueError) as excp:
            runner.dev.name = "wrong_device"
            process_cache(cache, kernel_options, tuning_options, runner)
            assert "device" in str(excep.value)

        with pytest.raises(ValueError) as excp:
            tuning_options.tune_params["y"] = ["a", "b"]
            process_cache(cache, kernel_options, tuning_options, runner)
            assert "parameter" in str(excep.value)

    finally:
        delete_temp_file(cache)
from collections import OrderedDict
from kernel_tuner.strategies import genetic_algorithm as ga
from kernel_tuner.interface import Options

tune_params = OrderedDict()
tune_params["x"] = [1, 2, 3]
tune_params["y"] = [4, 5, 6]

tuning_options = Options(dict(restrictions=[], tune_params= tune_params))
max_threads = 1024


def test_weighted_choice():
    pop_size = 5
    pop = ga.random_population(pop_size, tune_params, tuning_options, max_threads)
    weighted_pop = [[p, i] for i, p in enumerate(pop)]

    result = ga.weighted_choice(weighted_pop, 1)
    assert result[0] in pop

    result = ga.weighted_choice(weighted_pop, 2)
    print(result)
    assert result[0] in pop
    assert result[1] in pop
    assert result[0] != result[1]


def test_random_population():
    pop_size = 8
    pop = ga.random_population(pop_size, tune_params, tuning_options, max_threads)
from random import uniform as randfloat
import numpy as np
from collections import OrderedDict, namedtuple
from kernel_tuner.interface import Options
from kernel_tuner.strategies import minimize
from kernel_tuner.strategies import bayes_opt
from kernel_tuner.strategies.bayes_opt import BayesianOptimization

tune_params = OrderedDict()
tune_params["x"] = [1, 2, 3]
tune_params["y"] = [4, 5, 6]
tune_params["z"] = [7]

strategy_options = dict(popsize=0, max_fevals=10)
tuning_options = Options(
    dict(restrictions=[],
         tune_params=tune_params,
         strategy_options=strategy_options))
tuning_options["scaling"] = True
tuning_options["snap"] = True
max_threads = 1024

# initialize required data
parameter_space = list(itertools.product(*tune_params.values()))
_, _, eps = minimize.get_bounds_x0_eps(tuning_options)
original_to_normalized, normalized_to_original = bayes_opt.generate_normalized_param_dicts(
    tune_params, eps)
normalized_parameter_space = bayes_opt.normalize_parameter_space(
    parameter_space, tune_params, original_to_normalized)
pruned_parameter_space, removed_tune_params = bayes_opt.prune_parameter_space(
    normalized_parameter_space, tuning_options, tune_params,
    original_to_normalized)