def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True, try_winograd=True): if try_winograd: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception: pass # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " %(i+1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file)]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune_kernels(tasks, measure_option, tuner='gridsearch', early_stopping=None, log_filename='tuning.log'): for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) # converting conv2d tasks to conv2d_NCHWc tasks op_name = tsk.workload[0] if op_name == 'conv2d': func_create = 'topi_x86_conv2d_NCHWc' elif op_name == 'depthwise_conv2d_nchw': func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' else: raise ValueError("Tuning {} is not supported on x86".format(op_name)) task = autotvm.task.create(func_create, args=tsk.args, target=target, template_key='direct') task.workload = tsk.workload # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) # do tuning n_trial=len(task.config_space) tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename)])
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True, try_winograd=True, try_spatial_pack_depthwise=False): if try_winograd: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception: pass # if we want to use spatial pack for depthwise convolution if try_spatial_pack_depthwise: tuner = 'xgb_knob' for i in range(len(tasks)): if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw': tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'contrib_spatial_pack') tasks[i] = tsk # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'xgb_knob': tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
# depending on the specifics of the model and the target platform. # begin by extracting the taks from the onnx model tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params) # Tune the extracted tasks sequentially. for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) tuner_obj = XGBTuner(task, loss_type="rank") tuner_obj.tune( n_trial=min(tuning_option["trials"], len(task.config_space)), early_stopping=tuning_option["early_stopping"], measure_option=tuning_option["measure_option"], callbacks=[ autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix), autotvm.callback.log_to_file(tuning_option["tuning_records"]), ], ) ################################################################################ # The output from this tuning process will look something like this: # # .. code-block:: bash # # # [Task 1/24] Current/Best: 10.71/ 21.08 GFLOPS | Progress: (60/1000) | 111.77 s Done. # # [Task 1/24] Current/Best: 9.32/ 24.18 GFLOPS | Progress: (192/1000) | 365.02 s Done. # # [Task 2/24] Current/Best: 22.39/ 177.59 GFLOPS | Progress: (960/1000) | 976.17 s Done. # # [Task 3/24] Current/Best: 32.03/ 153.34 GFLOPS | Progress: (800/1000) | 776.84 s Done. # # [Task 4/24] Current/Best: 11.96/ 156.49 GFLOPS | Progress: (960/1000) | 632.26 s Done.
def tune_tasks( tasks, log_file, measure_option, tuner, trials, early_stopping=None, tuning_records=None, ): """Tune a list of tasks and output the history to a log file. Parameters ---------- tasks : list A list of autotvm.Tasks to tune. log_file : str A file to output the tuning history, in JSON. measure_option : autotvm.measure_option Options to build and run a tuning task. tuner : str Which tuner to use. trials : int The maximum number of tuning trials to perform. early_stopping : int, optional The minimum number of tuning trials to perform. This will be equal to 'trials' if not specified. tuning_records: str, optional Path to the file produced by the tuning, to be used during tuning. """ if not tasks: logger.warning("there were no tasks found to be tuned") return if not early_stopping: early_stopping = trials for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # Create a tuner if tuner in ("xgb", "xgb-rank"): tuner_obj = XGBTuner(tsk, loss_type="rank") elif tuner == "xgb_knob": tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob") elif tuner == "ga": tuner_obj = GATuner(tsk, pop_size=50) elif tuner == "random": tuner_obj = RandomTuner(tsk) elif tuner == "gridsearch": tuner_obj = GridSearchTuner(tsk) else: raise TVMCException("invalid tuner: %s " % tuner) # If transfer learning is being used, load the existing results if tuning_records and os.path.exists(tuning_records): logger.info("loading tuning records from %s", tuning_records) start_time = time.time() tuner_obj.load_history( autotvm.record.load_from_file(tuning_records)) logging.info("loaded history in %.2f sec(s)", time.time() - start_time) tuner_obj.tune( n_trial=min(trials, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(trials, prefix=prefix), autotvm.callback.log_to_file(log_file), ], )
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=500, log_filename='tuning.log', use_transfer_learning=True, try_winograd=True): if try_winograd: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception: pass # create tmp log file tmp_log_file = log_filename + ".tmp" # if os.path.exists(tmp_log_file): # os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning n_trial = 1000 n_trial = min(n_trial, len(tsk.config_space)) res = [] for i in range(len(tsk.config_space)): res.append(tuner_obj.space.get(i + 1)) with open("res.txt", "w", encoding='utf-8') as f: for line in res: f.write(str(line) + '\n') f.close() #任务单独保存 # import shutil # shutil.copyfile('res.txt', 'res'+prefix+'.txt') early_stopping = len(tsk.config_space) // 2 print("n_trial=", n_trial) print("early_stopping=", early_stopping) tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) np.savetxt("feas" + prefix + ".txt", tuner_obj.cost_model.feas, fmt='%s', delimiter=' ') np.savetxt("x_train" + prefix + ".txt", tuner_obj.cost_model.x_train, fmt='%s', delimiter=' ') np.savetxt("y_train" + prefix + ".txt", tuner_obj.cost_model.y_train, fmt='%s', delimiter=' ') #他做到了同算子--》空间维度相同的迁移学习,那么实际上resnet18--->resnet50? # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename)
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True, try_winograd=True): if try_winograd: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception: pass if os.path.exists(log_filename): return # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i+1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': print("***** xgb ******") tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tunber_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning done_flag = False while (not done_flag): try: tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file)]) done_flag = True except: print("\nexception happened... wait 20 seconds.") sleep(20) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def autotvm_tune(func, params, target): """ Parameters: ---------- func : relay.expr.Function params : dict of str to numpy array target : tvm.target.Target ops : List of relay.op """ # Array of autotvm.task.Task tasks = autotvm.task.extract_from_program(func, target=target, params=params, ops=(relay.op.nn.conv2d, )) # Check tasks. for i in range(len(tasks)): op_name = tasks[i].workload[0] if op_name == 'conv2d': func_create = 'topi_x86_conv2d_NCHWc' elif op_name == 'depthwise_conv2d_nchw': func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' else: raise ValueError( "Tuning {} is not supported on x86".format(op_name)) print("[Create Task %2d/%2d (%s, %s) ] " % (i + 1, len(tasks), tasks[i].name, tasks[i].workload[0])) tsk = autotvm.task.create(func_create, args=tasks[i].args, target=tasks[i].target, template_key='direct') tsk.workload = tasks[i].workload tasks[i] = tsk # turning option. tuner = 'xgb' n_trial = 100 early_stopping = None log_filename = 'tuning.log' use_transfer_learning = True measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.LocalRunner(number=10, repeat=1, min_repeat_ms=1000)) # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune_tasks(tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True, try_winograd=True): for i in range(len(tasks)): print(tasks[i].args) data, kernel, padding, stride, layout, dtype = tasks[i].args new_args = (data, kernel, padding, stride, layout, dtype) block_factor = 4 CO, CI, KH, KW = kernel[1] if CO % block_factor == 0 and CI % block_factor == 0: new_task = autotvm.task.create(tasks[i].name, new_args, tasks[i].target, tasks[i].target_host, 'int8') tasks[i] = new_task if args.pretuned is not None: pretuned_ctx = autotvm.apply_history_best(args.pretuned) _tasks = [] for task in tasks: if pretuned_ctx._query_inside(target, task.workload) is None: _tasks.append(task) else: print('Ignoring {}'.format(task)) tasks = _tasks print(tasks) # create tmp log file tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file)
def tune_tasks(mod, params, option): print('Creating tasks ...') tasks = autotvm.task.extract_from_program(mod["main"], target=option['target'], params=params, ops=(relay.op.nn.conv2d, relay.op.nn.dense)) if option['try_winograd']: for i in range(len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') input_channel = tsk.workload[1][1] if input_channel >= 64: tasks[i] = tsk except Exception as err: print(err) # create tmp log file tmp_log_file = option['log_file'] if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, tsk in enumerate(reversed(tasks)): # converting conv2d tasks to conv2d_NCHWc tasks # op_name = tsk.workload[0] # if op_name == 'conv2d': # func_create = 'topi_x86_conv2d_NCHWc' # elif op_name == 'depthwise_conv2d_nchw': # func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' # else: # func_create = tasks[i].name # task = autotvm.task.create(func_create, args=tsk.args, # target=target, template_key='direct') # task.workload = tsk.workload # tsk = task prefix = "[Task %2d/%2d] (%s)" %(i+1, len(tasks), tsk.name) # create tuner tuner = option['tuner'] if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if option['use_transfer_learning']: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) # do tuning n_trial = min(option['n_trial'], len(tsk.config_space)) tuner_obj.tune(n_trial=n_trial, early_stopping=option['early_stopping'], measure_option=option['measure_option'], callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file)]) if os.path.exists(option['log_best_file']): os.remove(option['log_best_file']) autotvm.record.pick_best(option['log_file'], option['log_best_file'])
def tune_model(mod, params, tune_settings, target, model_name): """ Tune a model for a specified number of trials along with other tune settings. Tune settings are specified using a json configuration, as per the TVM tools readme. """ early_stopping = tune_settings['early_stopping'] number = tune_settings["number"] save_path = tune_settings["save_path"] save_name = tune_settings["save_name"] repeat = tune_settings["repeat"] debug = tune_settings.get("debug_gadqn") or False trials = tune_settings["trials"] tuner = tune_settings["tuner"] target = tvm.target.Target(target) tasks = autotvm.task.extract_from_program( mod["main"], target=target, target_host="llvm", params=params) runner = autotvm.LocalRunner( number=number, repeat=repeat) measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(build_func="default"), runner=runner) for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # Create a tuner if tuner in ("xgb", "xgb-rank"): tuner_obj = XGBTuner(tsk, loss_type="rank") elif tuner == "xgb_knob": tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob") elif tuner == "ga": tuner_obj = GATuner(tsk, pop_size=50) elif tuner == "random": tuner_obj = RandomTuner(tsk) elif tuner == "gridsearch": tuner_obj = GridSearchTuner(tsk) elif tuner == "ga-dqn" and debug: tuner_obj = GADQNTunerDebug(tsk) elif tuner == "ga-dqn": tuner_obj = GADQNTuner(tsk) else: raise ValueError("invalid tuner: %s " % tuner) abs_path = Path(save_path + save_name).resolve() abs_path.mkdir(exist_ok=True, parents=True) abs_path_str = str(abs_path) tuner_obj.tune( n_trial=min(trials, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(trials, prefix=prefix), autotvm.callback.log_to_file(abs_path_str + f"/tuning_record_model={model_name}.json"), ], ) # Save debug info for rl tuner only if tuner == "ga-dqn" and debug: tuner_obj.save_model(save_path, save_name + f"_model={model_name}_layer={i}") del tuner_obj
def tune(mod, params, X_ex): number = 10 repeat = 1 min_repeat_ms = 0 # since we're tuning on a CPU, can be set to 0 timeout = 10 # in seconds # create a TVM runner runner = autotvm.LocalRunner( number=number, repeat=repeat, timeout=timeout, min_repeat_ms=min_repeat_ms, ) # Create a simple structure for holding tuning options. We use an XGBoost # algorithim for guiding the search. For a production job, you will want to set # the number of trials to be larger than the value of 10 used here. For CPU we # recommend 1500, for GPU 3000-4000. The number of trials required can depend # on the particular model and processor, so it's worth spending some time # evaluating performance across a range of values to find the best balance # between tuning time and model optimization. Because running tuning is time # intensive we set number of trials to 10, but do not recommend a value this # small. The ``early_stopping`` parameter is the minimum number of trails to # run before a condition that stops the search early can be applied. The # measure option indicates where trial code will be built, and where it will be # run. In this case, we're using the ``LocalRunner`` we just created and a # ``LocalBuilder``. The ``tuning_records`` option specifies a file to write # the tuning data to. tuning_option = { "tuner": "xgb", "trials": 10, "early_stopping": 100, "measure_option": autotvm.measure_option( builder=autotvm.LocalBuilder(build_func="default"), runner=runner ), "tuning_records": "resnet-50-v2-autotuning.json", } tasks = autotvm.task.extract_from_program(mod["main"], target=TARGET, params=params) for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) tuner_obj = XGBTuner(task, loss_type="rank") tuner_obj.tune( n_trial=min(tuning_option["trials"], len(task.config_space)), early_stopping=tuning_option["early_stopping"], measure_option=tuning_option["measure_option"], callbacks=[ autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix), autotvm.callback.log_to_file(tuning_option["tuning_records"]), ], ) with autotvm.apply_history_best(tuning_option["tuning_records"]): with tvm.transform.PassContext(opt_level=3, config={}): lib = relay.build(mod, target=target, params=params) dev = tvm.device(str(target), 0) optimized_module = graph_executor.GraphModule(lib["default"](dev)) optimized_module.set_input("input0", X_ex) optimized_module.run() # dry run test return optimized_module
def tune_tasks(tasks, only_parse, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename=None, use_transfer_learning=True, try_nchwc=True, try_winograd=False, try_spatial_pack_depthwise=False): # create tmp log file tmp_log_file = log_filename + ".tmp" if not use_transfer_learning: if os.path.exists(tmp_log_file): os.remove(tmp_log_file) else: # select actual best logs if not os.path.exists(tmp_log_file): os.mknod(tmp_log_file) autotvm.record.pick_best(tmp_log_file, log_filename) if os.path.exists(tmp_log_file): # sort out best historic entries print("Load historic training logs...") best_context = autotvm.task.ApplyHistoryBest( autotvm.record.load_from_file(tmp_log_file)) best_tgtkeys = best_context.best_by_targetkey print("Total tasks: %s" % len(tasks)) if try_nchwc: for i in range(len(tasks)): # converting conv2d tasks to conv2d_NCHWc tasks op_name = tasks[i].workload[0] if op_name == 'conv2d': func_create = 'topi_x86_conv2d_NCHWc' elif op_name == 'depthwise_conv2d_nchw': func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' try: # try NCHWc template tsk = autotvm.task.create(func_create, tasks[i].args, tasks[i].target, tasks[i].target_host, 'direct') tsk.workload = tasks[i].workload print( "[Override Task %2d/%2d (%s) with NCHWc] {cfg.space: %i -> %i}" % (i + 1, len(tasks), tsk.workload[0], len(tasks[i].config_space), len(tsk.config_space))) tasks[i] = tsk except Exception: pass if try_winograd: for i in range(0, len(tasks)): try: # try winograd template tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'winograd') tasks.append(tsk) print( "[New Task %2d->%2d (%s) winograd] {cfg.space: %i -> %i}" % (i + 1, len(tasks), tsk.workload[0], len(tasks[i].config_space), len(tsk.config_space))) except Exception: pass # if we want to use spatial pack for depthwise convolution if try_spatial_pack_depthwise: for i in range(len(tasks)): if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw': tuner = 'xgb_knob' tsk = autotvm.task.create(tasks[i].name, tasks[i].args, tasks[i].target, tasks[i].target_host, 'contrib_spatial_pack') tasks.append(tsk) print( "[New Task %2d->%2d (%s) contrib_spatial_pack] {cfg.space: %i -> %i}" % (i + 1, len(tasks), tsk.workload[0], len(tasks[i].config_space), len(tsk.config_space))) for i, tsk in enumerate(tasks): tsk_template = '' tsk_org_flop = -1 device_name = target.device_name if target.device_name else "cpu" try: # compute best historic entry GFLOPS tsk_template = best_tgtkeys[(device_name, tsk.workload)][0][2].template_key tsk_org_cost = np.mean(best_tgtkeys[(device_name, tsk.workload)][1].costs) tsk_org_flop = tsk.flop / tsk_org_cost / 1e9 except: pass if tsk_org_flop == -1: org_flop_str = "no history" else: org_flop_str = "%.2f GFLOPS /%s" % (tsk_org_flop, tsk_template) prefix = "[Task %2d/%2d %s|%s] (%s) {%s}" % ( i + 1, len(tasks), tsk.workload[1][:4], tsk.workload[2][:4], tsk.workload[0], org_flop_str) if only_parse: print("%s SKIP tunning" % prefix) continue # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(tsk, loss_type='rank') elif tuner == 'xgb_knob': tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob') elif tuner == 'ga': tuner_obj = GATuner(tsk, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(tsk) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(tsk) else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history( autotvm.record.load_from_file(tmp_log_file)) # do tuning tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)), early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file) ]) # pick best records to a cache file autotvm.record.pick_best(tmp_log_file, log_filename) if not use_transfer_learning: os.remove(tmp_log_file)