Ejemplo n.º 1
0
 def __init__(self, tc_lang, **kwargs):
     # tuner_cache will look like:
     # hash_key -> {"forward": options1, "backward": options2}
     self.tuner_cache = {}
     self.kwargs = kwargs
     self.tc_lang = tc_lang
     self.autotuner = ATenAutotuner(tc_lang)
     self.set_autotuner_parameters(**kwargs)
Ejemplo n.º 2
0
class TcAutotuner(object):
    def __init__(self, tc_lang, **kwargs):
        # tuner_cache will look like:
        # hash_key -> {"forward": options1, "backward": options2}
        self.tuner_cache = {}
        self.kwargs = kwargs
        self.tc_lang = tc_lang
        self.autotuner = ATenAutotuner(tc_lang)
        self.set_autotuner_parameters(**kwargs)

    def set_autotuner_parameters(self,
                                 pop_size=20,
                                 crossover_rate=80,
                                 mutation_rate=7,
                                 generations=10,
                                 number_elites=1,
                                 threads=8,
                                 gpus="0",
                                 proto="/tmp/tuner.txt",
                                 restore_from_proto=False,
                                 restore_number=10,
                                 log_generations=False,
                                 tuner_min_launch_total_threads=64,
                                 **kwargs):
        self.autotuner.pop_size(pop_size)
        self.autotuner.crossover_rate(crossover_rate)
        self.autotuner.mutation_rate(mutation_rate)
        self.autotuner.generations(generations)
        self.autotuner.number_elites(number_elites)
        self.autotuner.threads(threads)
        self.autotuner.gpus(gpus)
        self.autotuner.proto(proto)
        self.autotuner.restore_from_proto(restore_from_proto)
        self.autotuner.restore_number(restore_number)
        self.autotuner.log_generations(log_generations)
        self.autotuner.tuner_min_launch_total_threads(
            tuner_min_launch_total_threads)

    # We need to pass the inputs so that we can load the correct options from
    # the cache that correspond to the inputs sizes. This is useful when the
    # cache may contain multiple kernels and multiple sizes for each kernel
    def load(self, filename, tc_name, inputs, num_candidates=1):
        best_options = self.autotuner.load(filename, tc_name, inputs,
                                           num_candidates)
        if num_candidates == 1:
            return best_options[0]
        return best_options

    # if the cache_file is not "" then the tuning results would be saved to file
    def tune_and_store(self, tc_name, inputs, mapping_options, cache_file=""):
        options = mapping_options
        if not isinstance(options, Options):
            options = Options(options)
        try:
            best_options = self.autotuner.tune(cache_file, tc_name, inputs,
                                               options, [options])
            return best_options
        except Exception as e:
            logger.error('Raised exception: {}'.format(e))
            return options

    def autotune(self, *inputs, **kwargs):
        input_tensors = get_tensors(list(inputs))
        kwargs.update(self.kwargs)
        name, backward_name = get_tc_names_from_kwargs(**kwargs)
        kwargs.pop("name", None)
        backward = True if backward_name is not None else False
        hash_key = get_tc_hash_key(name, *input_tensors)
        # lookup for the options in the cache. Whenever we make the call to
        # autotune, tuning must happen. But if the kernel has been tuned earlier
        # then we can use previous options to seed the tuning.
        if hash_key in self.tuner_cache:
            options_cache = self.tuner_cache[hash_key]
        else:
            options_cache = {}

        # we give priority to the options user might have passed via file, or
        # Options object.
        cache_file = ""
        if "cache" in kwargs and kwargs["cache"]:
            if isinstance(kwargs["cache"], bool):
                hash_key = get_tc_hash_key(name, *input_tensors)
                cache_file = "/tmp/{}_{}".format(hash_key, str(uuid.uuid4()))
            elif isinstance(kwargs["cache"], str):
                cache_file = kwargs["cache"]
            logger.info(
                'Autotuning cache will be saved to: {}.cuda/options'.format(
                    cache_file))
        else:
            logger.warning(
                "Autotuning results won't be cached. 'cache' option is not set"
            )

        # we will first run the autotuning on the forward layer, the inputs are given
        # for that, we will tune those
        kwargs["type"] = "forward"
        # we pass this tuner object so we can load from file without having to
        # create special object
        kwargs["tuner"] = self.autotuner
        options = get_options_from_kwargs_and_tuner_cache(
            name, cache_file, options_cache, *input_tensors, **kwargs)
        forward_best_options = self.tune_and_store(name,
                                                   input_tensors,
                                                   mapping_options=options,
                                                   cache_file=cache_file)
        # update the cache with the options
        options_cache["forward"] = forward_best_options
        if not backward:
            self.tuner_cache[hash_key] = options_cache
            return forward_best_options

        # now, we have to tune the backward layer, for that, we need to run
        # the forward layer first, get its output,
        logger.info('Autotuning the backward layer now')
        cu = TcCompilationUnit()
        cu.define(self.tc_lang)

        if "options" in kwargs:
            orig_options = kwargs["options"]
            kwargs["options"] = forward_best_options
            outputs = cu.compile_and_run(name, input_tensors, **kwargs)
            kwargs["options"] = orig_options
        else:
            outputs = cu.compile_and_run(name,
                                         input_tensors,
                                         options=forward_best_options,
                                         **kwargs)
        # now that we have the outputs of the forward pass, we have the inputs
        # for the backward layer and we can now tune the backward layer
        reorder_function = kwargs[
            "reorder_function"] if "reorder_function" in kwargs else None
        rearranged_outputs = list(outputs)
        if reorder_function is not None:
            rearranged_outputs = reorder_function(list(outputs))
        inputs = make_contiguous(
            unpack_variables(input_tensors + list(rearranged_outputs)))

        if cache_file:
            cache_file = cache_file + "_backward"
            logger.info(
                'Backwards autotuning cache will be saved to: {}.cuda/options'.
                format(cache_file))
        kwargs["type"] = "backward"
        options = get_options_from_kwargs_and_tuner_cache(
            backward_name, cache_file, options_cache, *inputs, **kwargs)
        backward_best_options = self.tune_and_store(backward_name,
                                                    inputs,
                                                    mapping_options=options,
                                                    cache_file=cache_file)
        # update the cache with the options
        options_cache["backward"] = backward_best_options
        self.tuner_cache[hash_key] = options_cache
        return [forward_best_options, backward_best_options]