def get_options_from_kwargs(name, *inputs, **kwargs): # now the options can be a tuple (if training) or it will be just options # (only forward) options = None if "options" in kwargs and kwargs["options"] is not None: options = kwargs["options"] assert "type" in kwargs, "layer type not specified: forward/backward" if isinstance(options, list) and len(options) == 2: options = options[0] if kwargs["type"] == "forward" else options[1] elif "training" in kwargs and kwargs["training"] and kwargs["type"] == "backward": logger.warning('Same mapping options will be used to run backward layer, please pass backward mapping options for better performance.') elif "cache" in kwargs and kwargs["cache"]: options = get_options_from_cache_file(name, *inputs, **kwargs) elif "options_cache" in kwargs and kwargs["options_cache"]: options_cache = kwargs["options_cache"] assert kwargs["type"] is not None, "layer type not specified: forward/backward" options = options_cache[kwargs["type"]] logger.info("Tuned kernel options found, using those options") if options is None: options =CudaMappingOptions("naive") logger.warning("No mapping options passed, 'naive' type mapping options will be used and will likely have bad performance. See help(your_layer.__call__) for setting mapping options.") if not isinstance(options,CudaMappingOptions): options =CudaMappingOptions(options) return options
def get_options_from_kwargs_and_tuner_cache(name, cache_file, options_cache, *inputs, **kwargs): options = None if "options" in kwargs and kwargs["options"] is not None: options = kwargs["options"] assert "type" in kwargs, "tuning layer type not specified: forward/backward" # if we pass separate options for forward/backward, we use them otherwise # use the same options if isinstance(options, list) and len(options) == 2: options = options[0] if kwargs["type"] == "forward" else options[1] elif cache_file and isinstance(kwargs["cache"], str): options = get_options_from_cache_file(name, *inputs, **kwargs) elif options_cache and kwargs["type"] in options_cache and options_cache[ kwargs["type"]] is not None: options = options_cache[kwargs["type"]] logger.info( "Kernel was previously tuned, seeding the current tuning with those mapping options" ) if options is None: options = CudaMappingOptions("naive") logger.warning( "Using 'naive' type mapping options for autotuning. See help(your_layer.autotune) for how to set mapping options." ) if not isinstance(options, CudaMappingOptions): options = CudaMappingOptions(options) return options
def tune_and_store(self, tc_name, inputs, mapping_options, cache_file=""): options = mapping_options if not isinstance(options,CudaMappingOptions): options =CudaMappingOptions(options) try: best_options = self.autotuner.tune(tc_name, inputs, options, cache_file) return best_options except Exception as e: logger.error('Raised exception: {}'.format(e)) return options
def test_autotuner_start_options_and_run_kernel(self): lang = MATMUL_LANG matmul = tc.define(lang, name="matmul") mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda() options = CudaMappingOptions("mlp") best_options = matmul.autotune(mat1, mat2, cache=True, options=options, **tc.autotuner_settings) out = matmul(mat1, mat2, options=best_options)
def test_manual_options(self): lang = MATMUL_LANG matmul = tc.define(lang, name="matmul") mat1, mat2 = torch.randn(3, 4).cuda(), torch.randn(4, 5).cuda() options = CudaMappingOptions("naive") out = matmul(mat1, mat2, options=options)
def test_options(self): print('\nCreating mapping_options') options = CudaMappingOptions("naive") options.useSharedMemory(True) options.unrollCopyShared(False) options.mapToBlocks([256, 8]) options.mapToThreads([4, 16, 4]) options.tile([2, 8, 64, 128]) options.unroll(128) options.fixParametersBeforeScheduling(False) options.scheduleFusionStrategy("Max") options.outerScheduleFusionStrategy("Preserve3Coincident") print('Mapping options created successfully')