def main(): """Main function.""" log_file = sys.argv[1] target = sys.argv[2] new_log_file = 'replay_{0}'.format(log_file) log_callback = log_to_file(new_log_file) GLOBAL_SCOPE.in_tuning = True measure_option = autotvm.measure_option(builder=LocalBuilder(), runner=LocalRunner( timeout=10, number=5, repeat=3, min_repeat_ms=1000)) measure_batch = None for batch in tqdm(batch_loader(log_file, target)): if measure_batch is None: measure_batch = create_measure_batch(batch[0].task, measure_option) results = measure_batch(batch) log_callback(None, batch, results) GLOBAL_SCOPE.in_tuning = False if measure_batch is not None: del measure_batch
def tune(self, n_trial, measure_option, early_stopping=None, callbacks=(), si_prefix="G"): """ GADQNTuner requires custom tuning pipeline as it requires partial measurement of genes after crossover, before mutation. DISCLAIMER: In order to customise the tuning pipeline we had to reimplement the tune function. This method is mostly taken from Tuner with the exception of an implementation of a custom tuning pipeline. """ measure_batch = create_measure_batch(self.task, measure_option) n_parallel = getattr(measure_batch, "n_parallel", 1) early_stopping = early_stopping or 1e9 format_si_prefix(0, si_prefix) GLOBAL_SCOPE.in_tuning = True do_crossover = True self.mutation_agent, self.crossover_agent = self.create_rl_agents( self.discount, int(ceil(n_trial / 2)), self.hidden_sizes, self.learning_rate) while self.step_count < n_trial: if not self.has_next(): break # Initialise a random population. if self.step_count < self.pop_size: for _ in range(self.pop_size): gene = point2knob(np.random.randint(len(self.space)), self.dims) while knob2point(gene, self.dims) in self.visited: gene = point2knob(np.random.randint(len(self.space)), self.dims) transition = Transition(None, None, None, gene) self.population.append(transition) self.visited.add(knob2point(gene, self.dims)) self.measure_configs(self.population, n_parallel, measure_batch, callbacks) self.initial_score = np.mean([p.score for p in self.population]) self.reserve_elites() # Apply GA-DQN tuning once initial population has been created. else: if do_crossover: self.population.extend(self.elite_population) self.reserve_elites() self.crossover_update(n_parallel, measure_batch, callbacks) do_crossover = False else: self.mutate_update(n_parallel, measure_batch, callbacks) do_crossover = True self.ttl = min(early_stopping + self.best_iter, n_trial) - self.step_count if self.step_count >= self.best_iter + early_stopping: logger.debug("Early stopped. Best iter: %d.", self.best_iter) break GLOBAL_SCOPE.in_tuning = False del measure_batch
def run_one_wkl(wkl, new_log_path, inputs): task = wkl.to_task() # Re-tune the best configs. log_writter = log_to_file(new_log_path) measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=1000)) measure_batch = create_measure_batch(task, measure_option) results = measure_batch(inputs) log_writter(None, inputs, results) del measure_batch return
def tune_kernels(tasks, gen_graph_tuner_candidates, measure_top_n, measure_option, tuner='random', early_stopping=None, n_trial=5000, log_filename='tuning.log'): """Tune kernels with the ranking model.""" remeasure_option = None if tuner == 'round': # Setup another measure option for final remeasurment. remeasure_option = autotvm.measure_option( builder=LocalBuilder(), runner=measure_option['runner'].local_runner, ) assert isinstance(measure_option['runner'], RankModelRunner) best_results = [] for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) callbacks = [] if task.name in [ 'dense_small_batch.cuda', 'conv2d_cudnn.cuda', 'dense_cublas.cuda', 'dense_large_batch.cuda', 'conv2d_transpose_nchw.cuda', 'dense_tensorcore.cuda' ]: # Ignore these four tasks continue if task.name not in measure_option['runner'].models: print('not covered by cost models') continue # create tuner if tuner == 'round': tuner_obj = RoundTuner(task, n_cfg=measure_top_n) callbacks = [rank_progress(n_trial, prefix=prefix) ] # Use different callbacks. else: if tuner in ('xgb', 'xgb-rank'): tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) callbacks = [ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename) ] tic = time.time() # do tuning tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=callbacks) # Round tuner needs an extra measurement step to get the real throughputs. if tuner == 'round': max_n_layout = 20 if gen_graph_tuner_candidates else 1 top_cfgs = tuner_obj.get_top_rank_cfgs(max_n_layout) measure_batch = create_measure_batch(task, remeasure_option) inputs = [ MeasureInput(task.target, task, config) for config in top_cfgs ] sys.stderr.write('{} Measure Top {} Configs'.format( prefix, len(inputs))) results = measure_batch(inputs) best_idx, best_flops = max( [(idx, i.task.flop / np.mean(r.costs) / 1e9 if r.error_no == 0 else 0) for idx, (i, r) in enumerate(zip(inputs, results))], key=lambda x: x[1]) best_results.append((task.workload, best_idx, best_flops)) sys.stderr.write(' | Best %.2f GFLOPS at Top %d | %.2fs\n' % (best_flops, best_idx, time.time() - tic)) autotvm.callback.log_to_file(log_filename)(None, inputs, results) return best_results
def tune_kernels( tasks, measure_top_n, measure_option, tuner="random", early_stopping=None, n_trial=5000, log_filename="tuning.log", ): """Tune kernels with the ranking model.""" remeasure_option = None if tuner == "round": # Setup another measure option for final remeasurment. remeasure_option = autotvm.measure_option( builder=LocalBuilder(), runner=measure_option["runner"].local_runner, ) assert isinstance(measure_option["runner"], RankModelRunner) for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) callbacks = [] if task.name not in measure_option["runner"].models: print("%s %s not covered by cost models" % (prefix, task.name)) continue # create tuner if tuner == "round": tuner_obj = RoundTuner(task, n_cfg=measure_top_n) callbacks = [rank_progress(n_trial, prefix=prefix) ] # Use different callbacks. else: if tuner in ("xgb", "xgb-rank"): tuner_obj = XGBTuner(task, loss_type="rank") elif tuner == "ga": tuner_obj = GATuner(task, pop_size=50) elif tuner == "random": tuner_obj = RandomTuner(task) elif tuner == "gridsearch": tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) callbacks = [ autotvm.callback.progress_bar(n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename), ] tic = time.time() # do tuning tuner_obj.tune( n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=callbacks, ) # Round tuner needs an extra measurement step to get the real throughputs. if tuner == "round": top_cfgs = tuner_obj.get_top_rank_cfgs(1) measure_batch = create_measure_batch(task, remeasure_option) inputs = [ MeasureInput(task.target, task, config) for config in top_cfgs ] sys.stderr.write("{} Measure Top {} Configs".format( prefix, len(inputs))) results = measure_batch(inputs) best_idx, best_flops = max( [(idx, i.task.flop / np.mean(r.costs) / 1e9 if r.error_no == 0 else 0) for idx, (i, r) in enumerate(zip(inputs, results))], key=lambda x: x[1], ) sys.stderr.write(" | Best %.2f GFLOPS at Top %d | %.2fs\n" % (best_flops, best_idx, time.time() - tic)) autotvm.callback.log_to_file(log_filename)(None, inputs, results)