def func(args): counter.append(1) if len(counter) % 50 == 0: print(len(counter), flush=True) val = minimize._cost_func(args, kernel_options, tuning_options, runner, results) return val
def evaluate_objective_function(self, param_config: tuple) -> float: """ Evaluates the objective function """ param_config = self.unprune_param_config(param_config) denormalized_param_config = self.denormalize_param_config(param_config) if not util.config_valid(denormalized_param_config, self.tuning_options, self.max_threads): return self.invalid_value val = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results) self.fevals += 1 return val
def test__cost_func(): x = [1, 4] kernel_options = None tuning_options = Options(scaling=False, snap=False, tune_params=tune_params, restrictions=None, strategy_options={}, cache={}) runner = fake_runner() results = [] time = minimize._cost_func(x, kernel_options, tuning_options, runner, results) assert time == 5 tuning_options.cache["1,4"] = OrderedDict([("x", 1), ("y", 4), ("time", 5)]) time = minimize._cost_func(x, kernel_options, tuning_options, runner, results) assert time == 5 # check if 1st run is properly cached and runner is only called once assert runner.run.call_count == 1 # check if restrictions are properly handled restrictions = ["False"] tuning_options = Options(scaling=False, snap=False, tune_params=tune_params, restrictions=restrictions, strategy_options={}, verbose=True, cache={}) time = minimize._cost_func(x, kernel_options, tuning_options, runner, results) assert time == 1e20
def visualize_after_opt(self): """ Visualize the model after the optimization """ print(self.__model.kernel_.get_params()) print(self.__model.log_marginal_likelihood()) import matplotlib.pyplot as plt _, mu, std = self.predict_list(self.searchspace) brute_force_observations = list() for param_config in self.searchspace: obs = minimize._cost_func(param_config, self.kernel_options, self.tuning_options, self.runner, self.results) if obs == self.invalid_value: obs = None brute_force_observations.append(obs) x_axis = range(len(mu)) plt.fill_between(x_axis, mu - std, mu + std, alpha=0.2, antialiased=True) plt.plot(x_axis, mu, label="predictions", linestyle=' ', marker='.') plt.plot(x_axis, brute_force_observations, label="actual", linestyle=' ', marker='.') plt.legend() plt.show()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: dict :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: dict :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: dict :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ results = [] cache = {} # SA works with real parameter values and does not need scaling tuning_options["scaling"] = False args = (kernel_options, tuning_options, runner, results, cache) tune_params = tuning_options.tune_params # optimization parameters T = 1.0 T_min = 0.001 alpha = 0.9 niter = 20 # generate random starting point and evaluate cost pos = [] for i, _ in enumerate(tune_params.keys()): pos.append(random_val(i, tune_params)) old_cost = _cost_func(pos, *args) if tuning_options.verbose: c = 0 # main optimization loop while T > T_min: if tuning_options.verbose: print("iteration: ", c, "T", T, "cost: ", old_cost) c += 1 for i in range(niter): new_pos = neighbor(pos, tune_params) new_cost = _cost_func(new_pos, *args) ap = acceptance_prob(old_cost, new_cost, T) r = random.random() if ap > r: if tuning_options.verbose: print("new position accepted", new_pos, new_cost, 'old:', pos, old_cost, 'ap', ap, 'r', r, 'T', T) pos = new_pos old_cost = new_cost T = T * alpha return results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: kernel_tuner.interface.Options :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: kernel_tuner.interface.Options :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: kernel_tuner.interface.Options :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ dna_size = len(tuning_options.tune_params.keys()) pop_size = 20 generations = 100 tuning_options["scaling"] = False tune_params = tuning_options.tune_params population = random_population(dna_size, pop_size, tune_params) best_time = 1e20 all_results = [] cache = {} for generation in range(generations): if tuning_options.verbose: print("Generation %d, best_time %f" % (generation, best_time)) #determine fitness of population members weighted_population = [] for dna in population: time = _cost_func(dna, kernel_options, tuning_options, runner, all_results, cache) weighted_population.append((dna, time)) population = [] #'best_time' is used only for printing if tuning_options.verbose and all_results: best_time = min(all_results, key=lambda x: x["time"])["time"] #population is sorted such that better configs have higher chance of reproducing weighted_population.sort(key=lambda x: x[1]) #crossover and mutate for _ in range(pop_size//2): ind1 = weighted_choice(weighted_population) ind2 = weighted_choice(weighted_population) ind1, ind2 = crossover(ind1, ind2) population.append(mutate(ind1, dna_size, tune_params)) population.append(mutate(ind2, dna_size, tune_params)) return all_results, runner.dev.get_environment()
def func(**kwargs): args = [kwargs[key] for key in tuning_options.tune_params.keys()] return -1.0 * minimize._cost_func(args, kernel_options, tuning_options, runner, results)
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: kernel_tuner.interface.Options :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: kernel_tuner.interface.Options :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: kernel_tuner.interface.Options :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ dna_size = len(tuning_options.tune_params.keys()) options = tuning_options.strategy_options pop_size = options.get("popsize", 20) generations = options.get("maxiter", 50) crossover = supported_methods[options.get("method", "uniform")] mutation_chance = options.get("mutation_chance", 10) max_fevals = options.get("max_fevals", 100) max_threads = runner.dev.max_threads tuning_options["scaling"] = False tune_params = tuning_options.tune_params best_time = 1e20 all_results = [] unique_results = {} population = random_population(pop_size, tune_params, tuning_options, max_threads) for generation in range(generations): # determine fitness of population members weighted_population = [] for dna in population: time = _cost_func(dna, kernel_options, tuning_options, runner, all_results) weighted_population.append((dna, time)) # population is sorted such that better configs have higher chance of reproducing weighted_population.sort(key=lambda x: x[1]) # 'best_time' is used only for printing if tuning_options.verbose and all_results: best_time = min(all_results, key=lambda x: x["time"])["time"] if tuning_options.verbose: print("Generation %d, best_time %f" % (generation, best_time)) population = [] unique_results.update({",".join([str(i) for i in dna]): time for dna, time in weighted_population}) if len(unique_results) >= max_fevals: break # crossover and mutate while len(population) < pop_size: dna1, dna2 = weighted_choice(weighted_population, 2) children = crossover(dna1, dna2) for child in children: child = mutate(child, tune_params, mutation_chance, tuning_options, max_threads) if child not in population and util.config_valid(child, tuning_options, max_threads): population.append(child) if len(population) >= pop_size: break # could combine old + new generation here and do a selection return all_results, runner.dev.get_environment()
def tune(runner, kernel_options, device_options, tuning_options): """ Find the best performing kernel configuration in the parameter space :params runner: A runner from kernel_tuner.runners :type runner: kernel_tuner.runner :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: kernel_tuner.interface.Options :param device_options: A dictionary with all options for the device on which the kernel should be tuned. :type device_options: kernel_tuner.interface.Options :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: kernel_tuner.interface.Options :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains a information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ dna_size = len(tuning_options.tune_params.keys()) options = tuning_options.strategy_options pop_size = options.get("popsize", 20) generations = options.get("maxiter", 100) crossover = supported_methods[options.get("method", "uniform")] mutation_chance = options.get("mutation_chance", 10) tuning_options["scaling"] = False tune_params = tuning_options.tune_params best_time = 1e20 all_results = [] population = random_population(pop_size, tune_params) for generation in range(generations): # optionally enable something to remove duplicates and increase diversity, # leads to longer execution times, but might improve robustness # population = ensure_diversity(population, pop_size, tune_params) if tuning_options.verbose: print("Generation %d, best_time %f" % (generation, best_time)) # determine fitness of population members weighted_population = [] for dna in population: time = _cost_func(dna, kernel_options, tuning_options, runner, all_results) weighted_population.append((dna, time)) population = [] # 'best_time' is used only for printing if tuning_options.verbose and all_results: best_time = min(all_results, key=lambda x: x["time"])["time"] # population is sorted such that better configs have higher chance of reproducing weighted_population.sort(key=lambda x: x[1]) # crossover and mutate for _ in range(pop_size // 2): dna1, dna2 = weighted_choice(weighted_population, 2) dna1, dna2 = crossover(dna1, dna2) population.append(mutate(dna1, tune_params, mutation_chance)) population.append(mutate(dna2, tune_params, mutation_chance)) return all_results, runner.dev.get_environment()
def hillclimb(pos, max_fevals, all_results, unique_results, kernel_options, tuning_options, runner): """ simple hillclimbing search until max_fevals is reached or no improvement is found """ tune_params = tuning_options.tune_params max_threads = runner.dev.max_threads #measure start point time time = _cost_func(pos, kernel_options, tuning_options, runner, all_results) #starting new hill climbing search, no need to remember past best best_global = best = time #store the start pos before hill climbing start_pos = pos[:] found_improved = True while found_improved: found_improved = False current_results = [] pos = start_pos[:] index = 0 #in each dimension see the possible values for values in tune_params.values(): #for each value in this dimension for value in values: pos[index] = value #check restrictions #if restrictions and not util.check_restrictions(restrictions, pos, tune_params.keys(), False): # continue if not util.config_valid(pos, tuning_options, max_threads): continue #get time for this position time = _cost_func(pos, kernel_options, tuning_options, runner, current_results) if time < best: best = time best_pos = pos[:] #greedely replace start_pos with pos to continue from this point start_pos = pos[:] unique_results.update({",".join([str(v) for k, v in record.items() if k in tune_params]): record["time"] for record in current_results}) fevals = len(unique_results) if fevals >= max_fevals: all_results += current_results return #restore and move to next dimension pos[index] = start_pos[index] index = index + 1 #see if there was improvement, update start_pos set found_improved to True if best < best_global: found_improved = True start_pos = best_pos best_global = best #append current_results to all_results all_results += current_results