コード例 #1
0
def tune_kernel(kernel_name, kernel_string, problem_size, arguments, tune_params, grid_div_x=None, grid_div_y=None, grid_div_z=None, restrictions=None,
                answer=None, atol=1e-6, verify=None, verbose=False, lang=None, device=0, platform=0, smem_args=None, cmem_args=None, texmem_args=None,
                compiler=None, compiler_options=None, log=None, iterations=7, block_size_names=None, quiet=False, strategy=None, strategy_options=None,
                cache=None, metrics=None, simulation_mode=False, observers=None):

    if log:
        logging.basicConfig(filename=kernel_name + datetime.now().strftime('%Y%m%d-%H:%M:%S') + '.log', level=log)

    kernel_source = core.KernelSource(kernel_string, lang)

    _check_user_input(kernel_name, kernel_source, arguments, block_size_names)

    # check for forbidden names in tune parameters
    util.check_tune_params_list(tune_params)

    # check whether block_size_names are used as expected
    util.check_block_size_params_names_list(block_size_names, tune_params)

    if iterations < 1:
        raise ValueError("Iterations should be at least one!")

    #sort all the options into separate dicts
    opts = locals()
    kernel_options = Options([(k, opts[k]) for k in _kernel_options.keys()])
    tuning_options = Options([(k, opts[k]) for k in _tuning_options.keys()])
    device_options = Options([(k, opts[k]) for k in _device_options.keys()])

    logging.debug('tune_kernel called')
    logging.debug('kernel_options: %s', util.get_config_string(kernel_options))
    logging.debug('tuning_options: %s', util.get_config_string(tuning_options))
    logging.debug('device_options: %s', util.get_config_string(device_options))

    if strategy:
        if strategy in strategy_map:
            strategy = strategy_map[strategy]
        else:
            raise ValueError("Strategy %s not recognized" % strategy)

        #make strategy_options into an Options object
        if tuning_options.strategy_options:
            if not isinstance(strategy_options, Options):
                tuning_options.strategy_options = Options(strategy_options)

            #select strategy based on user options
            if "fraction" in tuning_options.strategy_options and not tuning_options.strategy == 'random_sample':
                raise ValueError('It is not possible to use fraction in combination with strategies other than "random_sample". ' \
                                 'Please set strategy="random_sample", when using "fraction" in strategy_options')

            #check if method is supported by the selected strategy
            if "method" in tuning_options.strategy_options:
                method = tuning_options.strategy_options.method
                if not method in strategy.supported_methods:
                    raise ValueError('Method %s is not supported for strategy %s' % (method, tuning_options.strategy))

        #if no strategy_options dict has been passed, create empty dictionary
        else:
            tuning_options.strategy_options = Options({})

    #if no strategy selected
    else:
        strategy = brute_force

    # select the runner for this job based on input
    SelectedRunner = SimulationRunner if simulation_mode else SequentialRunner
    with SelectedRunner(kernel_source, kernel_options, device_options, iterations, observers) as runner:

        #the user-specified function may or may not have an optional atol argument;
        #we normalize it so that it always accepts atol.
        tuning_options.verify = util.normalize_verify_function(tuning_options.verify)

        #process cache
        if cache:
            if cache[-5:] != ".json":
                cache += ".json"

            util.process_cache(cache, kernel_options, tuning_options, runner)
        else:
            tuning_options.cache = {}
            tuning_options.cachefile = None

        #call the strategy to execute the tuning process
        results, env = strategy.tune(runner, kernel_options, device_options, tuning_options)

        #finished iterating over search space
        if not device_options.quiet:
            if results:    #checks if results is not empty
                best_config = min(results, key=lambda x: x['time'])
                units = getattr(runner, "units", None)
                print("best performing configuration:")
                util.print_config_output(tune_params, best_config, device_options.quiet, metrics, units)
            else:
                print("no results to report")

        if cache:
            util.close_cache(cache)

    return results, env
コード例 #2
0
ファイル: sequential.py プロジェクト: vatozZ/kernel_tuner
    def run(self, parameter_space, kernel_options, tuning_options):
        """ Iterate through the entire parameter space using a single Python process

        :param parameter_space: The parameter space as an iterable.
        :type parameter_space: iterable

        :param kernel_options: A dictionary with all options for the kernel.
        :type kernel_options: kernel_tuner.interface.Options

        :param tuning_options: A dictionary with all options regarding the tuning
            process.
        :type tuning_options: kernel_tuner.iterface.Options

        :returns: A list of dictionaries for executed kernel configurations and their
            execution times. And a dictionary that contains information
            about the hardware/software environment on which the tuning took place.
        :rtype: list(dict()), dict()

        """
        logging.debug('sequential runner started for ' + kernel_options.kernel_name)

        results = []

        #iterate over parameter space
        for element in parameter_space:
            params = OrderedDict(zip(tuning_options.tune_params.keys(), element))

            #attempt to warmup the GPU by running the first config in the parameter space and ignoring the result
            if not self.warmed_up:
                self.dev.compile_and_benchmark(self.kernel_source, self.gpu_args, params, kernel_options, tuning_options)
                self.warmed_up = True

            #check if element is in the cache
            x_int = ",".join([str(i) for i in element])
            if tuning_options.cache:
                if x_int in tuning_options.cache:
                    results.append(tuning_options.cache[x_int])
                    continue

            result = self.dev.compile_and_benchmark(self.kernel_source, self.gpu_args, params, kernel_options, tuning_options)
            if result is None:
                logging.debug('received benchmark result is None, kernel configuration was skipped silently due to compile or runtime failure')
                params.update({ "time": 1e20 })
                store_cache(x_int, params, tuning_options)
                continue

            #print and append to results
            if isinstance(result, dict):
                time = result["time"]
            else:
                time = result

            params['time'] = time

            if isinstance(result, dict):
                params.update(result)

            if tuning_options.metrics:
                params = process_metrics(params, tuning_options.metrics)

            print_config_output(tuning_options.tune_params, params, self.quiet, tuning_options.metrics, self.units)

            store_cache(x_int, params, tuning_options)
            results.append(params)

        return results, self.dev.get_environment()