Beispiel #1
0
 def generate(tc: str, entry_point: str,
              *inputs: torch.Tensor) -> MappingOptions:
     cache = MappingOptionsCache(cache_filename)
     loaded = cache.load(tc, entry_point, inputs, 1)
     if len(loaded) > 0:
         return loaded[0]
     return MappingOptions('naive')
Beispiel #2
0
    def compileOrTune(self,
                      name="",
                      force_reinforcement_tuning=False,
                      inputs=()):
        if self.debug:
            print(
                "On Tc: {}\ncompile def {}, force_reinforcement_tuning {}, inputs: {}"
                .format(
                    self.tc, name, force_reinforcement_tuning,
                    "".join("{}/{}, ".format(t.size().__str__(),
                                             t.stride().__str__())
                            for t in inputs)))

        if not self.compilation_cache.is_compiled(name, inputs):
            cache = MappingOptionsCache(self.tuner_cache_file)
            mapping_options = None
            base_options_list = cache.load(self.tc, name, inputs, 1)
            if len(base_options_list) > 0 and not force_reinforcement_tuning:
                mapping_options = base_options_list[0]
                if self.debug:
                    print("Found best options in {}:\n{}".format(
                        self.tuner_cache_file, mapping_options))
            else:
                if self.debug:
                    print(
                        "########################################################"
                        "########################################################"
                    )
                    print(
                        "force_reinforcement_tuning = {} was specified, {} options loaded from "
                        "{}".format(force_reinforcement_tuning,
                                    len(base_options_list),
                                    self.tuner_cache_file))
                    print(
                        "Starting a tuning run (abort it with Ctrl+C when "
                        "performance is satisfactory.\nYou can always reinforce "
                        "the results later by passing a proper tuner cache file "
                        "and specifying force_reinforcement_tuning=True)")
                    print(
                        "########################################################"
                        "########################################################"
                    )

                if len(base_options_list) == 0:
                    mapping_options = MappingOptions()
                else:
                    mapping_options = base_options_list[0]

                tuner = Tuner(self.tc, self.tuner_cache_file)
                mapping_options = tuner.tune(name, inputs, mapping_options,
                                             self.tuner_config)

            self.compilation_cache.compile(name, inputs, mapping_options)
Beispiel #3
0
def autotune(tc: str,
             entry_point: str,
             *inputs: torch.Tensor,
             starting_options: Optional[Union[str, MappingOptions]] = None,
             tuner_config: Optional[TunerConfig] = TunerConfig(),
             cache_filename: Optional[str] = None,
             load_from_cache: Optional[bool] = False,
             store_to_cache: Optional[bool] = False) -> MappingOptions:
    r"""Tunes the defined TC function for given inputs.

        The MappingOptions from which tuning starts is either passed explicitly via
        :code:`starting_options` or loaded from a cache file (when both
        :code:`cache_filename` and :code:`load_from_cache` are properly
        specified). Exactly one of :code:`starting_options` and
        :code:`load_from_cache` must be specified.

        It is possible to obtain a reinforcement tuning behavior by tuning over
        multiple executions and specifying both :code:`load_from_cache` and
        :code:`store_to_cache`. It is recommended to only use a single cache
        file for all TC defs and reinforce it over time.

        An example of usage is provided with :func:`autotune_and_compile`.

        :param tc: a string containing one of more TC defs.
        :param entry_point: the name of the TC def to compile and execute.
        :param inputs: PyTorch Tensors that TC should tune for. The inputs must be
            passed in the order they are also passed in the definition of
            the TC function.
        :param starting_options: :class:`~tclib.MappingOptions` from which tuning should start.
        :param tuner_config: :class:`~tclib.TunerConfig` to control the behavior of the autotuner.
        :param load_from_cache: Get the starting :class:`~tclib.MappingOptions` by loading from
            :code:`cache_filename`. If loading fails to recover an entry
            from the cache file for the given input sizes an assertion error
            will trigger.
        :param store_to_cache: Optionally store the best result by appending it to
            the backing cache file.

        Returns:
            The best options found during this tuning run.
    """

    if cache_filename is not None:
        assert load_from_cache or store_to_cache, (
            "cache_filename specified" +
            "must also specify load_from_cache or store_to_cache")
    if load_from_cache or store_to_cache:
        assert cache_filename is not None, (
            "load_from_cache or store_to_cache" +
            " specified, must also specify cache_filename")
    assert starting_options is not None or load_from_cache, (
        "Must specify either starting_options or load_from_cache, choose one!")
    assert starting_options is None or not load_from_cache, (
        "Cannot specify both starting_options and load_from_cache, choose one!"
    )

    base_options = None
    if load_from_cache:
        cache = MappingOptionsCache(cache_filename)
        loaded = cache.load(tc, entry_point, inputs, 1)
        assert len(loaded) > 0, (
            "Could not load from cache for TC {} and sizes {}".format(
                entry_point, "".join(str(i.size()) + " " for i in inputs)))
        base_options = loaded[0]
    else:
        base_options = (MappingOptions(starting_options) if isinstance(
            starting_options, str) else starting_options)

    # TODO: This is still an implicit store behavior in the C++ API,
    #     make it explicit...
    tuner = Tuner(tc, cache_filename if store_to_cache else "")
    return tuner.tune(entry_point, inputs, base_options, tuner_config)
# Run once without timing
compilation_cache.unchecked_run("matmul", (A, B))

# unchecked_run on tensors
time_tc(100, "raw unchecked_run naive options\t",
        lambda name, ins: compilation_cache.unchecked_run(name, ins), "matmul",
        (A, B))

################################################################################
# 3. Short tuning run saving to file then load the best option to create a
#    compilation cache
################################################################################
with tempfile.NamedTemporaryFile() as cache_file:
    tuner = Tuner(mm, cache_file.name)
    top1 = tuner.tune("matmul", (A, B), MappingOptions('naive'), tuner_config)
    cache = MappingOptionsCache(cache_file.name)
    top10 = cache.load(mm, "matmul", (A, B), 10)
    assert top1.__str__() == top10[0].__str__()

    # Compile and run with the new options
    compilation_cache.compile("matmul", (A, B), top1)
    time_tc(100, "raw unchecked_run tuned options\t",
            lambda name, ins: compilation_cache.unchecked_run(name, ins),
            "matmul", (A, B))

################################################################################
# 4. Simple torch.autograd.Function
################################################################################
T = tc.define(
    mm,
    tc.make_autotuned_options_factory(starting_options='naive',
Beispiel #5
0
################################################################################
from tensor_comprehensions.tclib import Tuner
from tensor_comprehensions.tclib import MappingOptionsCache
from tensor_comprehensions.tclib import TunerConfig

import uuid
unique_filename = "/tmp/" + str(uuid.uuid4())
print("Tune with cache @", unique_filename)
print("Note that if you pass a fixed filename, you can reinforce an " +
      "existing tuning state")

tuner = Tuner(mm, unique_filename)
top1 = tuner.tune(
    "matmul", (mat1, mat2), MappingOptions(),
    TunerConfig(threads=8, pop_size=25, generations=3, devices="0"))
cache = MappingOptionsCache(unique_filename)
top10 = cache.load(mm, "matmul", (mat1, mat2), 10)
assert top1.__str__() == top10[0].__str__()

# Compile and run with the new options
compilation_cache.compile("matmul", (mat1, mat2), top1)
time_tc(100, "raw unchecked_run tuned options\t",
        lambda name, ins: compilation_cache.unchecked_run(name, ins, ()),
        "matmul", (mat1, mat2))


################################################################################
# 4. Simple TC builder
################################################################################
class TcBuilder():
    def __init__(self,