def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: kernel_tuner.interface.Options :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: kernel_tuner.interface.Options :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: kernel_tuner.interface.Options :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ if not bayes_opt_present: raise ImportError("Error: optional dependency Bayesian Optimization not installed") init_points = tuning_options.strategy_options.get("popsize", 20) n_iter = tuning_options.strategy_options.get("max_fevals", 100) # defaults as used by Bayesian Optimization Python package acq = tuning_options.strategy_options.get("method", "ucb") kappa = tuning_options.strategy_options.get("kappa", 2.576) xi = tuning_options.strategy_options.get("xi", 0.0) tuning_options["scaling"] = True results = [] # function to pass to the optimizer def func(**kwargs): args = [kwargs[key] for key in tuning_options.tune_params.keys()] return -1.0 * minimize._cost_func(args, kernel_options, tuning_options, runner, results) bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options) pbounds = OrderedDict(zip(tuning_options.tune_params.keys(), bounds)) verbose = 0 if tuning_options.verbose: verbose = 2 # print(np.isnan(init_points).any()) optimizer = BayesianOptimization(f=func, pbounds=pbounds, verbose=verbose) optimizer.maximize(init_points=init_points, n_iter=n_iter, acq=acq, kappa=kappa, xi=xi) if tuning_options.verbose: print(optimizer.max) return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: kernel_tuner.interface.Options :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: kernel_tuner.interface.Options :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: kernel_tuner.interface.Options :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ if not bayes_opt_present: raise ImportError( "Error: optional dependency Bayesian Optimization not installed") init_points = tuning_options.strategy_options.get("popsize", 20) n_iter = tuning_options.strategy_options.get("max_fevals", 100) #defaults as used by Scikit Python package acq = tuning_options.strategy_options.get("method", "gp_hedge") tuning_options["scaling"] = True results = [] counter = [] #function to pass to the optimizer def func(args): counter.append(1) if len(counter) % 50 == 0: print(len(counter), flush=True) val = minimize._cost_func(args, kernel_options, tuning_options, runner, results) return val bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options) res = gp_minimize(func, bounds, acq_func=acq, n_calls=n_iter, n_initial_points=init_points, n_jobs=-1) if tuning_options.verbose: print(res) return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: dict :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: dict :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: dict :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ results = [] cache = {} method = tuning_options.method #scale variables in x to make 'eps' relevant for multiple variables tuning_options["scaling"] = True bounds, x0, eps = get_bounds_x0_eps(tuning_options) kwargs = setup_method_arguments(method, bounds) options = setup_method_options(method, tuning_options) kwargs['options'] = options args = (kernel_options, tuning_options, runner, results, cache) minimizer_kwargs = dict(**kwargs) minimizer_kwargs["method"] = method minimizer_kwargs["args"] = args opt_result = scipy.optimize.basinhopping(_cost_func, x0, stepsize=eps, minimizer_kwargs=minimizer_kwargs, disp=tuning_options.verbose) if tuning_options.verbose: print(opt_result.message) return results, runner.dev.get_environment()
def test_get_bounds_x0_eps(): tuning_options = Options() tuning_options["scaling"] = True tune_params = OrderedDict() tune_params['x'] = [0, 1, 2, 3, 4] tuning_options["tune_params"] = tune_params bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options) assert bounds == [(0.0, 1.0)] assert x0 == [0.5] assert eps == 0.2 tuning_options["scaling"] = False bounds, x0, eps = minimize.get_bounds_x0_eps(tuning_options) assert bounds == [(0, 4)] assert x0 == [2.0] assert eps == 1.0
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: dict :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: dict :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: dict :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ results = [] #scale variables in x because PSO works with velocities to visit different configurations tuning_options["scaling"] = True #using this instead of get_bounds because scaling is used bounds, _, _ = get_bounds_x0_eps(tuning_options) args = (kernel_options, tuning_options, runner, results) num_particles = tuning_options.strategy_options.get("popsize", 20) maxiter = tuning_options.strategy_options.get("maxiter", 100) w = tuning_options.strategy_options.get("w", 0.5) # inertia constant c1 = tuning_options.strategy_options.get("c1", 2.0) # cognitive constant c2 = tuning_options.strategy_options.get("c2", 1.0) # social constant best_time_global = 1e20 best_position_global = [] # init particle swarm swarm = [] for i in range(0, num_particles): swarm.append(Particle(bounds, args)) for i in range(maxiter): if tuning_options.verbose: print("start iteration ", i, "best time global", best_time_global) # evaluate particle positions for j in range(num_particles): swarm[j].evaluate(_cost_func) # update global best if needed if swarm[j].time <= best_time_global: best_position_global = swarm[j].position best_time_global = swarm[j].time # update particle velocities and positions for j in range(0, num_particles): swarm[j].update_velocity(best_position_global, w, c1, c2) swarm[j].update_position(bounds) if tuning_options.verbose: print('Final result:') print(best_position_global) print(best_time_global) return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: dict :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: dict :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: dict :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ results = [] cache = {} #scale variables in x because PSO works with velocities to visit different configurations tuning_options["scaling"] = True #using this instead of get_bounds because scaling is used bounds, _, _ = get_bounds_x0_eps(tuning_options) args = (kernel_options, tuning_options, runner, results, cache) num_particles = 20 maxiter = 100 best_time_global = 1e20 best_position_global = [] # init particle swarm swarm = [] for i in range(0, num_particles): swarm.append(Particle(bounds, args)) for i in range(maxiter): if tuning_options.verbose: print("start iteration ", i, "best time global", best_time_global) # evaluate particle positions for j in range(num_particles): swarm[j].evaluate(_cost_func) # update global best if needed if swarm[j].time <= best_time_global: best_position_global = swarm[j].position best_time_global = swarm[j].time # update particle velocities and positions for j in range(0, num_particles): swarm[j].update_velocity(best_position_global) swarm[j].update_position(bounds) if tuning_options.verbose: print('Final result:') print(best_position_global) print(best_time_global) return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: dict :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: dict :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: dict :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ results = [] # scale variables in x because PSO works with velocities to visit different configurations tuning_options["scaling"] = True # using this instead of get_bounds because scaling is used bounds, _, _ = get_bounds_x0_eps(tuning_options) args = (kernel_options, tuning_options, runner, results) num_particles = tuning_options.strategy_options.get("popsize", 20) maxiter = tuning_options.strategy_options.get("maxiter", 100) # parameters needed by the Firefly Algorithm B0 = tuning_options.strategy_options.get("B0", 1.0) gamma = tuning_options.strategy_options.get("gamma", 1.0) alpha = tuning_options.strategy_options.get("alpha", 0.2) best_time_global = 1e20 best_position_global = [] # init particle swarm swarm = [] for i in range(0, num_particles): swarm.append(Firefly(bounds, args)) # compute initial intensities for j in range(num_particles): swarm[j].compute_intensity(_cost_func) for c in range(maxiter): if tuning_options.verbose: print("start iteration ", c, "best time global", best_time_global) # compare all to all and compute attractiveness for i in range(num_particles): for j in range(num_particles): if swarm[i].intensity < swarm[j].intensity: dist = swarm[i].distance_to(swarm[j]) beta = B0 * np.exp(-gamma * dist * dist) swarm[i].move_towards(swarm[j], beta, alpha) swarm[i].compute_intensity(_cost_func) # update global best if needed, actually only used for printing if swarm[i].time <= best_time_global: best_position_global = swarm[i].position best_time_global = swarm[i].time swarm.sort(key=lambda x: x.time) if tuning_options.verbose: print('Final result:') print(best_position_global) print(best_time_global) return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: kernel_tuner.interface.Options :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: kernel_tuner.interface.Options :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: kernel_tuner.interface.Options :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ #Bayesian Optimization strategy seems to need some hyper parameter tuning to #become better than random sampling for auto-tuning GPU kernels. #alpha, normalize_y, and n_restarts_optimizer are options to #https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html #defaults used by Baysian Optimization are: # alpha=1e-6, #1e-3 recommended for very noisy or discrete search spaces # n_restarts_optimizer=5, # normalize_y=True, #several exploration friendly settings are: (default is acq="ucb", kappa=2.576) # acq="poi", xi=1e-1 # acq="ei", xi=1e-1 # acq="ucb", kappa=10 if not bayes_opt_present: raise ImportError( "Error: optional dependency Bayesian Optimization not installed") #defaults as used by Bayesian Optimization Python package acq = tuning_options.strategy_options.get("method", "poi") kappa = tuning_options.strategy_options.get("kappa", 2.576) xi = tuning_options.strategy_options.get("xi", 0.0) init_points = tuning_options.strategy_options.get("popsize", 5) n_iter = tuning_options.strategy_options.get("maxiter", 25) alpha = tuning_options.strategy_options.get("alpha", 1e-6) tuning_options["scaling"] = True results = [] #function to pass to the optimizer def func(**kwargs): args = [kwargs[key] for key in tuning_options.tune_params.keys()] return -1.0 * minimize._cost_func(args, kernel_options, tuning_options, runner, results) bounds, _, _ = minimize.get_bounds_x0_eps(tuning_options) pbounds = OrderedDict(zip(tuning_options.tune_params.keys(), bounds)) verbose = 0 if tuning_options.verbose: verbose = 2 optimizer = BayesianOptimization(f=func, pbounds=pbounds, verbose=verbose, alpha=alpha) optimizer.maximize(init_points=init_points, n_iter=n_iter, acq=acq, kappa=kappa, xi=xi) if tuning_options.verbose: print(optimizer.max) return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: kernel_tuner.interface.Options :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: kernel_tuner.interface.Options :param tuning_options: A dictionary with all options regarding the tuning process. Allows setting hyperparameters via the strategy_options key. :type tuning_options: kernel_tuner.interface.Options :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ max_fevals = tuning_options.strategy_options.get("max_fevals", 100) prune_parameterspace = tuning_options.strategy_options.get("pruneparameterspace", True) if not bayes_opt_present: raise ImportError("Error: optional dependencies for Bayesian Optimization not installed, please install scikit-learn and scikit-optimize") # epsilon for scaling should be the evenly spaced distance between the largest set of parameter options in an interval [0,1] tune_params = tuning_options.tune_params tuning_options["scaling"] = True _, _, eps = minimize.get_bounds_x0_eps(tuning_options) # compute cartesian product of all tunable parameters parameter_space = itertools.product(*tune_params.values()) # check for search space restrictions if tuning_options.restrictions is not None: tuning_options.verbose = False parameter_space = filter(lambda p: util.config_valid(p, tuning_options, runner.dev.max_threads), parameter_space) parameter_space = list(parameter_space) if len(parameter_space) < 1: raise ValueError("Empty parameterspace after restrictionscheck. Restrictionscheck is possibly too strict.") if len(parameter_space) == 1: raise ValueError(f"Only one configuration after restrictionscheck. Restrictionscheck is possibly too strict. Configuration: {parameter_space[0]}") # normalize search space to [0,1] normalize_dict, denormalize_dict = generate_normalized_param_dicts(tune_params, eps) parameter_space = normalize_parameter_space(parameter_space, tune_params, normalize_dict) # prune the parameter space to remove dimensions that have a constant parameter if prune_parameterspace: parameter_space, removed_tune_params = prune_parameter_space(parameter_space, tuning_options, tune_params, normalize_dict) else: parameter_space = list(parameter_space) removed_tune_params = [None] * len(tune_params.keys()) # initialize and optimize bo = BayesianOptimization(parameter_space, removed_tune_params, kernel_options, tuning_options, normalize_dict, denormalize_dict, runner) results = bo.optimize(max_fevals) return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: dict :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: dict :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: dict :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ results = [] cache = {} #scale variables in x because PSO works with velocities to visit different configurations tuning_options["scaling"] = True #using this instead of get_bounds because scaling is used bounds, _, _ = get_bounds_x0_eps(tuning_options) args = (kernel_options, tuning_options, runner, results, cache) num_particles = 20 maxiter = 100 #parameters needed by the Firefly Algorithm B0 = 1.0 gamma = 1.0 alpha = 0.20 best_time_global = 1e20 best_position_global = [] # init particle swarm swarm = [] for i in range(0, num_particles): swarm.append(Firefly(bounds, args)) # compute initial intensities for j in range(num_particles): swarm[j].compute_intensity(_cost_func) for c in range(maxiter): if tuning_options.verbose: print("start iteration ", c, "best time global", best_time_global) # compare all to all and compute attractiveness for i in range(num_particles): for j in range(num_particles): if swarm[i].intensity < swarm[j].intensity: dist = swarm[i].distance_to(swarm[j]) beta = B0 * np.exp(-gamma * dist * dist) swarm[i].move_towards(swarm[j], beta, alpha) swarm[i].compute_intensity(_cost_func) # update global best if needed, actually only used for printing if swarm[i].time <= best_time_global: best_position_global = swarm[i].position best_time_global = swarm[i].time swarm.sort(key=lambda x: x.time) if tuning_options.verbose: print('Final result:') print(best_position_global) print(best_time_global) return results, runner.dev.get_environment()
tune_params["x"] = [1, 2, 3] tune_params["y"] = [4, 5, 6] tune_params["z"] = [7] strategy_options = dict(popsize=0, max_fevals=10) tuning_options = Options( dict(restrictions=[], tune_params=tune_params, strategy_options=strategy_options)) tuning_options["scaling"] = True tuning_options["snap"] = True max_threads = 1024 # initialize required data parameter_space = list(itertools.product(*tune_params.values())) _, _, eps = minimize.get_bounds_x0_eps(tuning_options) original_to_normalized, normalized_to_original = bayes_opt.generate_normalized_param_dicts( tune_params, eps) normalized_parameter_space = bayes_opt.normalize_parameter_space( parameter_space, tune_params, original_to_normalized) pruned_parameter_space, removed_tune_params = bayes_opt.prune_parameter_space( normalized_parameter_space, tuning_options, tune_params, original_to_normalized) # initialize BO dev_dict = {'max_threads': max_threads} dev = namedtuple('Struct', dev_dict.keys())(*dev_dict.values()) runner_dict = {'dev': dev} runner = namedtuple('Struct', runner_dict.keys())(*runner_dict.values()) kernel_options = dict() BO = BayesianOptimization(pruned_parameter_space, removed_tune_params,