def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: kernel_tuner.interface.Options

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: kernel_tuner.interface.Options

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: kernel_tuner.interface.Options

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    if not bayes_opt_present:
        raise ImportError("Error: optional dependency Bayesian Optimization not installed")
    init_points = tuning_options.strategy_options.get("popsize", 20)
    n_iter = tuning_options.strategy_options.get("max_fevals", 100)

    # defaults as used by Bayesian Optimization Python package
    acq = tuning_options.strategy_options.get("method", "ucb")
    kappa = tuning_options.strategy_options.get("kappa", 2.576)
    xi = tuning_options.strategy_options.get("xi", 0.0)

    tuning_options["scaling"] = True

    results = []

    # function to pass to the optimizer
    def func(**kwargs):
        args = [kwargs[key] for key in tuning_options.tune_params.keys()]
        return -1.0 * minimize._cost_func(args, kernel_options, tuning_options, runner, results)

    bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options)
    pbounds = OrderedDict(zip(tuning_options.tune_params.keys(), bounds))

    verbose = 0
    if tuning_options.verbose:
        verbose = 2

    # print(np.isnan(init_points).any())

    optimizer = BayesianOptimization(f=func, pbounds=pbounds, verbose=verbose)

    optimizer.maximize(init_points=init_points, n_iter=n_iter, acq=acq, kappa=kappa, xi=xi)

    if tuning_options.verbose:
        print(optimizer.max)

    return results, runner.dev.get_environment()
Beispiel #2
0
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: kernel_tuner.interface.Options

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: kernel_tuner.interface.Options

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: kernel_tuner.interface.Options

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    if not bayes_opt_present:
        raise ImportError(
            "Error: optional dependency Bayesian Optimization not installed")
    init_points = tuning_options.strategy_options.get("popsize", 20)
    n_iter = tuning_options.strategy_options.get("max_fevals", 100)

    #defaults as used by Scikit Python package
    acq = tuning_options.strategy_options.get("method", "gp_hedge")

    tuning_options["scaling"] = True

    results = []
    counter = []

    #function to pass to the optimizer
    def func(args):
        counter.append(1)
        if len(counter) % 50 == 0:
            print(len(counter), flush=True)
        val = minimize._cost_func(args, kernel_options, tuning_options, runner,
                                  results)
        return val

    bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options)
    res = gp_minimize(func,
                      bounds,
                      acq_func=acq,
                      n_calls=n_iter,
                      n_initial_points=init_points,
                      n_jobs=-1)

    if tuning_options.verbose:
        print(res)

    return results, runner.dev.get_environment()
Beispiel #3
0
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: dict

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: dict

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: dict

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    results = []
    cache = {}

    method = tuning_options.method

    #scale variables in x to make 'eps' relevant for multiple variables
    tuning_options["scaling"] = True

    bounds, x0, eps = get_bounds_x0_eps(tuning_options)

    kwargs = setup_method_arguments(method, bounds)
    options = setup_method_options(method, tuning_options)
    kwargs['options'] = options

    args = (kernel_options, tuning_options, runner, results, cache)

    minimizer_kwargs = dict(**kwargs)
    minimizer_kwargs["method"] = method
    minimizer_kwargs["args"] = args

    opt_result = scipy.optimize.basinhopping(_cost_func,
                                             x0,
                                             stepsize=eps,
                                             minimizer_kwargs=minimizer_kwargs,
                                             disp=tuning_options.verbose)

    if tuning_options.verbose:
        print(opt_result.message)

    return results, runner.dev.get_environment()
Beispiel #4
0
def test_get_bounds_x0_eps():

    tuning_options = Options()
    tuning_options["scaling"] = True
    tune_params = OrderedDict()
    tune_params['x'] = [0, 1, 2, 3, 4]

    tuning_options["tune_params"] = tune_params

    bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options)

    assert bounds == [(0.0, 1.0)]
    assert x0 == [0.5]
    assert eps == 0.2

    tuning_options["scaling"] = False

    bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options)

    assert bounds == [(0, 4)]
    assert x0 == [2.0]
    assert eps == 1.0
def test_get_bounds_x0_eps():

    tuning_options = Options()
    tuning_options["scaling"] = True
    tune_params = OrderedDict()
    tune_params['x'] = [0, 1, 2, 3, 4]

    tuning_options["tune_params"] = tune_params

    bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options)

    assert bounds == [(0.0, 1.0)]
    assert x0 == [0.5]
    assert eps == 0.2

    tuning_options["scaling"] = False

    bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options)

    assert bounds == [(0, 4)]
    assert x0 == [2.0]
    assert eps == 1.0
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: dict

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: dict

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: dict

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    results = []
    cache = {}

    method = tuning_options.method

    #scale variables in x to make 'eps' relevant for multiple variables
    tuning_options["scaling"] = True

    bounds, x0, eps = get_bounds_x0_eps(tuning_options)

    kwargs = setup_method_arguments(method, bounds)
    options = setup_method_options(method, tuning_options)
    kwargs['options'] = options

    args = (kernel_options, tuning_options, runner, results, cache)

    minimizer_kwargs = dict(**kwargs)
    minimizer_kwargs["method"] = method
    minimizer_kwargs["args"] = args

    opt_result = scipy.optimize.basinhopping(_cost_func, x0, stepsize=eps, minimizer_kwargs=minimizer_kwargs, disp=tuning_options.verbose)

    if tuning_options.verbose:
        print(opt_result.message)

    return results, runner.dev.get_environment()
Beispiel #7
0
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: dict

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: dict

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: dict

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    results = []

    #scale variables in x because PSO works with velocities to visit different configurations
    tuning_options["scaling"] = True

    #using this instead of get_bounds because scaling is used
    bounds, _, _ = get_bounds_x0_eps(tuning_options)

    args = (kernel_options, tuning_options, runner, results)

    num_particles = tuning_options.strategy_options.get("popsize", 20)
    maxiter = tuning_options.strategy_options.get("maxiter", 100)

    w = tuning_options.strategy_options.get("w", 0.5)       # inertia constant
    c1 = tuning_options.strategy_options.get("c1", 2.0)     # cognitive constant
    c2 = tuning_options.strategy_options.get("c2", 1.0)     # social constant

    best_time_global = 1e20
    best_position_global = []

    # init particle swarm
    swarm = []
    for i in range(0, num_particles):
        swarm.append(Particle(bounds, args))

    for i in range(maxiter):
        if tuning_options.verbose:
            print("start iteration ", i, "best time global", best_time_global)

        # evaluate particle positions
        for j in range(num_particles):
            swarm[j].evaluate(_cost_func)

            # update global best if needed
            if swarm[j].time <= best_time_global:
                best_position_global = swarm[j].position
                best_time_global = swarm[j].time

        # update particle velocities and positions
        for j in range(0, num_particles):
            swarm[j].update_velocity(best_position_global, w, c1, c2)
            swarm[j].update_position(bounds)

    if tuning_options.verbose:
        print('Final result:')
        print(best_position_global)
        print(best_time_global)

    return results, runner.dev.get_environment()
Beispiel #8
0
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: dict

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: dict

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: dict

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    results = []
    cache = {}

    #scale variables in x because PSO works with velocities to visit different configurations
    tuning_options["scaling"] = True

    #using this instead of get_bounds because scaling is used
    bounds, _, _ = get_bounds_x0_eps(tuning_options)

    args = (kernel_options, tuning_options, runner, results, cache)

    num_particles = 20
    maxiter = 100

    best_time_global = 1e20
    best_position_global = []

    # init particle swarm
    swarm = []
    for i in range(0, num_particles):
        swarm.append(Particle(bounds, args))

    for i in range(maxiter):
        if tuning_options.verbose:
            print("start iteration ", i, "best time global", best_time_global)

        # evaluate particle positions
        for j in range(num_particles):
            swarm[j].evaluate(_cost_func)

            # update global best if needed
            if swarm[j].time <= best_time_global:
                best_position_global = swarm[j].position
                best_time_global = swarm[j].time

        # update particle velocities and positions
        for j in range(0, num_particles):
            swarm[j].update_velocity(best_position_global)
            swarm[j].update_position(bounds)

    if tuning_options.verbose:
        print('Final result:')
        print(best_position_global)
        print(best_time_global)

    return results, runner.dev.get_environment()
Beispiel #9
0
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: dict

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: dict

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: dict

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    results = []

    # scale variables in x because PSO works with velocities to visit different configurations
    tuning_options["scaling"] = True

    # using this instead of get_bounds because scaling is used
    bounds, _, _ = get_bounds_x0_eps(tuning_options)

    args = (kernel_options, tuning_options, runner, results)

    num_particles = tuning_options.strategy_options.get("popsize", 20)
    maxiter = tuning_options.strategy_options.get("maxiter", 100)

    # parameters needed by the Firefly Algorithm
    B0 = tuning_options.strategy_options.get("B0", 1.0)
    gamma = tuning_options.strategy_options.get("gamma", 1.0)
    alpha = tuning_options.strategy_options.get("alpha", 0.2)

    best_time_global = 1e20
    best_position_global = []

    # init particle swarm
    swarm = []
    for i in range(0, num_particles):
        swarm.append(Firefly(bounds, args))

    # compute initial intensities
    for j in range(num_particles):
        swarm[j].compute_intensity(_cost_func)

    for c in range(maxiter):
        if tuning_options.verbose:
            print("start iteration ", c, "best time global", best_time_global)

        # compare all to all and compute attractiveness
        for i in range(num_particles):
            for j in range(num_particles):

                if swarm[i].intensity < swarm[j].intensity:
                    dist = swarm[i].distance_to(swarm[j])
                    beta = B0 * np.exp(-gamma * dist * dist)

                    swarm[i].move_towards(swarm[j], beta, alpha)
                    swarm[i].compute_intensity(_cost_func)

                    # update global best if needed, actually only used for printing
                    if swarm[i].time <= best_time_global:
                        best_position_global = swarm[i].position
                        best_time_global = swarm[i].time

        swarm.sort(key=lambda x: x.time)

    if tuning_options.verbose:
        print('Final result:')
        print(best_position_global)
        print(best_time_global)

    return results, runner.dev.get_environment()
Beispiel #10
0
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: kernel_tuner.interface.Options

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: kernel_tuner.interface.Options

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: kernel_tuner.interface.Options

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    #Bayesian Optimization strategy seems to need some hyper parameter tuning to
    #become better than random sampling for auto-tuning GPU kernels.

    #alpha, normalize_y, and n_restarts_optimizer are options to
    #https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html
    #defaults used by Baysian Optimization are:
    #   alpha=1e-6,  #1e-3 recommended for very noisy or discrete search spaces
    #   n_restarts_optimizer=5,
    #   normalize_y=True,

    #several exploration friendly settings are: (default is acq="ucb", kappa=2.576)
    #   acq="poi", xi=1e-1
    #   acq="ei", xi=1e-1
    #   acq="ucb", kappa=10

    if not bayes_opt_present:
        raise ImportError(
            "Error: optional dependency Bayesian Optimization not installed")

    #defaults as used by Bayesian Optimization Python package
    acq = tuning_options.strategy_options.get("method", "poi")
    kappa = tuning_options.strategy_options.get("kappa", 2.576)
    xi = tuning_options.strategy_options.get("xi", 0.0)
    init_points = tuning_options.strategy_options.get("popsize", 5)
    n_iter = tuning_options.strategy_options.get("maxiter", 25)
    alpha = tuning_options.strategy_options.get("alpha", 1e-6)

    tuning_options["scaling"] = True

    results = []

    #function to pass to the optimizer
    def func(**kwargs):
        args = [kwargs[key] for key in tuning_options.tune_params.keys()]
        return -1.0 * minimize._cost_func(args, kernel_options, tuning_options,
                                          runner, results)

    bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options)
    pbounds = OrderedDict(zip(tuning_options.tune_params.keys(), bounds))

    verbose = 0
    if tuning_options.verbose:
        verbose = 2

    optimizer = BayesianOptimization(f=func,
                                     pbounds=pbounds,
                                     verbose=verbose,
                                     alpha=alpha)

    optimizer.maximize(init_points=init_points,
                       n_iter=n_iter,
                       acq=acq,
                       kappa=kappa,
                       xi=xi)

    if tuning_options.verbose:
        print(optimizer.max)

    return results, runner.dev.get_environment()
Beispiel #11
0
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: kernel_tuner.interface.Options

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: kernel_tuner.interface.Options

    :param tuning_options: A dictionary with all options regarding the tuning
        process. Allows setting hyperparameters via the strategy_options key.
    :type tuning_options: kernel_tuner.interface.Options

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    max_fevals = tuning_options.strategy_options.get("max_fevals", 100)
    prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True)
    if not bayes_opt_present:
        raise ImportError("Error: optional dependencies for Bayesian Optimization not installed, please install scikit-learn and scikit-optimize")

    # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1]
    tune_params = tuning_options.tune_params
    tuning_options["scaling"] = True
    _, _, eps = minimize.get_bounds_x0_eps(tuning_options)

    # compute cartesian product of all tunable parameters
    parameter_space = itertools.product(*tune_params.values())

    # check for search space restrictions
    if tuning_options.restrictions is not None:
        tuning_options.verbose = False
    parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space)
    parameter_space = list(parameter_space)
    if len(parameter_space) < 1:
        raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.")
    if len(parameter_space) == 1:
        raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}")

    # normalize search space to [0,1]
    normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps)
    parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict)

    # prune the parameter space to remove dimensions that have a constant parameter
    if prune_parameterspace:
        parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict)
    else:
        parameter_space = list(parameter_space)
        removed_tune_params = [None] * len(tune_params.keys())

    # initialize and optimize
    bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner)
    results = bo.optimize(max_fevals)

    return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options):
    """ Find the best performing kernel configuration in the parameter space

    :params runner: A runner from kernel_tuner.runners
    :type runner: kernel_tuner.runner

    :param kernel_options: A dictionary with all options for the kernel.
    :type kernel_options: dict

    :param device_options: A dictionary with all options for the device
        on which the kernel should be tuned.
    :type device_options: dict

    :param tuning_options: A dictionary with all options regarding the tuning
        process.
    :type tuning_options: dict

    :returns: A list of dictionaries for executed kernel configurations and their
        execution times. And a dictionary that contains a information
        about the hardware/software environment on which the tuning took place.
    :rtype: list(dict()), dict()

    """

    results = []
    cache = {}

    #scale variables in x because PSO works with velocities to visit different configurations
    tuning_options["scaling"] = True

    #using this instead of get_bounds because scaling is used
    bounds, _, _ = get_bounds_x0_eps(tuning_options)

    args = (kernel_options, tuning_options, runner, results, cache)

    num_particles = 20
    maxiter = 100

    #parameters needed by the Firefly Algorithm
    B0 = 1.0
    gamma = 1.0
    alpha = 0.20

    best_time_global = 1e20
    best_position_global = []

    # init particle swarm
    swarm = []
    for i in range(0, num_particles):
        swarm.append(Firefly(bounds, args))

    # compute initial intensities
    for j in range(num_particles):
        swarm[j].compute_intensity(_cost_func)

    for c in range(maxiter):
        if tuning_options.verbose:
            print("start iteration ", c, "best time global", best_time_global)

        # compare all to all and compute attractiveness
        for i in range(num_particles):
            for j in range(num_particles):

                if swarm[i].intensity < swarm[j].intensity:
                    dist = swarm[i].distance_to(swarm[j])
                    beta = B0 * np.exp(-gamma * dist * dist)

                    swarm[i].move_towards(swarm[j], beta, alpha)
                    swarm[i].compute_intensity(_cost_func)

                    # update global best if needed, actually only used for printing
                    if swarm[i].time <= best_time_global:
                        best_position_global = swarm[i].position
                        best_time_global = swarm[i].time

        swarm.sort(key=lambda x: x.time)

    if tuning_options.verbose:
        print('Final result:')
        print(best_position_global)
        print(best_time_global)

    return results, runner.dev.get_environment()
tune_params["x"] = [1, 2, 3]
tune_params["y"] = [4, 5, 6]
tune_params["z"] = [7]

strategy_options = dict(popsize=0, max_fevals=10)
tuning_options = Options(
    dict(restrictions=[],
         tune_params=tune_params,
         strategy_options=strategy_options))
tuning_options["scaling"] = True
tuning_options["snap"] = True
max_threads = 1024

# initialize required data
parameter_space = list(itertools.product(*tune_params.values()))
_, _, eps = minimize.get_bounds_x0_eps(tuning_options)
original_to_normalized, normalized_to_original = bayes_opt.generate_normalized_param_dicts(
    tune_params, eps)
normalized_parameter_space = bayes_opt.normalize_parameter_space(
    parameter_space, tune_params, original_to_normalized)
pruned_parameter_space, removed_tune_params = bayes_opt.prune_parameter_space(
    normalized_parameter_space, tuning_options, tune_params,
    original_to_normalized)

# initialize BO
dev_dict = {'max_threads': max_threads}
dev = namedtuple('Struct', dev_dict.keys())(*dev_dict.values())
runner_dict = {'dev': dev}
runner = namedtuple('Struct', runner_dict.keys())(*runner_dict.values())
kernel_options = dict()
BO = BayesianOptimization(pruned_parameter_space, removed_tune_params,