def tune_kernel(kernel_name, kernel_string, problem_size, arguments, tune_params, grid_div_x=None, grid_div_y=None, grid_div_z=None, restrictions=None, answer=None, atol=1e-6, verify=None, verbose=False, lang=None, device=0, platform=0, smem_args=None, cmem_args=None, texmem_args=None, compiler=None, compiler_options=None, log=None, iterations=7, block_size_names=None, quiet=False, strategy=None, strategy_options=None, cache=None, metrics=None, simulation_mode=False, observers=None): if log: logging.basicConfig(filename=kernel_name + datetime.now().strftime('%Y%m%d-%H:%M:%S') + '.log', level=log) kernel_source = core.KernelSource(kernel_string, lang) _check_user_input(kernel_name, kernel_source, arguments, block_size_names) # check for forbidden names in tune parameters util.check_tune_params_list(tune_params) # check whether block_size_names are used as expected util.check_block_size_params_names_list(block_size_names, tune_params) if iterations < 1: raise ValueError("Iterations should be at least one!") #sort all the options into separate dicts opts = locals() kernel_options = Options([(k, opts[k]) for k in _kernel_options.keys()]) tuning_options = Options([(k, opts[k]) for k in _tuning_options.keys()]) device_options = Options([(k, opts[k]) for k in _device_options.keys()]) logging.debug('tune_kernel called') logging.debug('kernel_options: %s', util.get_config_string(kernel_options)) logging.debug('tuning_options: %s', util.get_config_string(tuning_options)) logging.debug('device_options: %s', util.get_config_string(device_options)) if strategy: if strategy in strategy_map: strategy = strategy_map[strategy] else: raise ValueError("Strategy %s not recognized" % strategy) #make strategy_options into an Options object if tuning_options.strategy_options: if not isinstance(strategy_options, Options): tuning_options.strategy_options = Options(strategy_options) #select strategy based on user options if "fraction" in tuning_options.strategy_options and not tuning_options.strategy == 'random_sample': raise ValueError('It is not possible to use fraction in combination with strategies other than "random_sample". ' \ 'Please set strategy="random_sample", when using "fraction" in strategy_options') #check if method is supported by the selected strategy if "method" in tuning_options.strategy_options: method = tuning_options.strategy_options.method if not method in strategy.supported_methods: raise ValueError('Method %s is not supported for strategy %s' % (method, tuning_options.strategy)) #if no strategy_options dict has been passed, create empty dictionary else: tuning_options.strategy_options = Options({}) #if no strategy selected else: strategy = brute_force # select the runner for this job based on input SelectedRunner = SimulationRunner if simulation_mode else SequentialRunner with SelectedRunner(kernel_source, kernel_options, device_options, iterations, observers) as runner: #the user-specified function may or may not have an optional atol argument; #we normalize it so that it always accepts atol. tuning_options.verify = util.normalize_verify_function(tuning_options.verify) #process cache if cache: if cache[-5:] != ".json": cache += ".json" util.process_cache(cache, kernel_options, tuning_options, runner) else: tuning_options.cache = {} tuning_options.cachefile = None #call the strategy to execute the tuning process results, env = strategy.tune(runner, kernel_options, device_options, tuning_options) #finished iterating over search space if not device_options.quiet: if results: #checks if results is not empty best_config = min(results, key=lambda x: x['time']) units = getattr(runner, "units", None) print("best performing configuration:") util.print_config_output(tune_params, best_config, device_options.quiet, metrics, units) else: print("no results to report") if cache: util.close_cache(cache) return results, env
def run(self, parameter_space, kernel_options, tuning_options): """ Iterate through the entire parameter space using a single Python process :param parameter_space: The parameter space as an iterable. :type parameter_space: iterable :param kernel_options: A dictionary with all options for the kernel. :type kernel_options: kernel_tuner.interface.Options :param tuning_options: A dictionary with all options regarding the tuning process. :type tuning_options: kernel_tuner.iterface.Options :returns: A list of dictionaries for executed kernel configurations and their execution times. And a dictionary that contains information about the hardware/software environment on which the tuning took place. :rtype: list(dict()), dict() """ logging.debug('sequential runner started for ' + kernel_options.kernel_name) results = [] #iterate over parameter space for element in parameter_space: params = OrderedDict(zip(tuning_options.tune_params.keys(), element)) #attempt to warmup the GPU by running the first config in the parameter space and ignoring the result if not self.warmed_up: self.dev.compile_and_benchmark(self.kernel_source, self.gpu_args, params, kernel_options, tuning_options) self.warmed_up = True #check if element is in the cache x_int = ",".join([str(i) for i in element]) if tuning_options.cache: if x_int in tuning_options.cache: results.append(tuning_options.cache[x_int]) continue result = self.dev.compile_and_benchmark(self.kernel_source, self.gpu_args, params, kernel_options, tuning_options) if result is None: logging.debug('received benchmark result is None, kernel configuration was skipped silently due to compile or runtime failure') params.update({ "time": 1e20 }) store_cache(x_int, params, tuning_options) continue #print and append to results if isinstance(result, dict): time = result["time"] else: time = result params['time'] = time if isinstance(result, dict): params.update(result) if tuning_options.metrics: params = process_metrics(params, tuning_options.metrics) print_config_output(tuning_options.tune_params, params, self.quiet, tuning_options.metrics, self.units) store_cache(x_int, params, tuning_options) results.append(params) return results, self.dev.get_environment()