Example #1
0
def main():
    """Main function."""
    log_file = sys.argv[1]
    target = sys.argv[2]
    new_log_file = 'replay_{0}'.format(log_file)
    log_callback = log_to_file(new_log_file)

    GLOBAL_SCOPE.in_tuning = True
    measure_option = autotvm.measure_option(builder=LocalBuilder(),
                                            runner=LocalRunner(
                                                timeout=10,
                                                number=5,
                                                repeat=3,
                                                min_repeat_ms=1000))
    measure_batch = None

    for batch in tqdm(batch_loader(log_file, target)):
        if measure_batch is None:
            measure_batch = create_measure_batch(batch[0].task, measure_option)

        results = measure_batch(batch)
        log_callback(None, batch, results)

    GLOBAL_SCOPE.in_tuning = False
    if measure_batch is not None:
        del measure_batch
    def tune(self, n_trial, measure_option, early_stopping=None, callbacks=(), si_prefix="G"):
        """
        GADQNTuner requires custom tuning pipeline as it requires partial measurement of genes
        after crossover, before mutation.

        DISCLAIMER: In order to customise the tuning pipeline we had to reimplement the tune
                    function. This method is mostly taken from Tuner with the exception of
                    an implementation of a custom tuning pipeline.
        """
        measure_batch = create_measure_batch(self.task, measure_option)
        n_parallel = getattr(measure_batch, "n_parallel", 1)
        early_stopping = early_stopping or 1e9
        format_si_prefix(0, si_prefix)
        GLOBAL_SCOPE.in_tuning = True
        do_crossover = True

        self.mutation_agent, self.crossover_agent = self.create_rl_agents(
            self.discount, int(ceil(n_trial / 2)), self.hidden_sizes, self.learning_rate)

        while self.step_count < n_trial:
            if not self.has_next():
                break

            # Initialise a random population.
            if self.step_count < self.pop_size:
                for _ in range(self.pop_size):
                    gene = point2knob(np.random.randint(len(self.space)), self.dims)
                    while knob2point(gene, self.dims) in self.visited:
                        gene = point2knob(np.random.randint(len(self.space)), self.dims)
                    transition = Transition(None, None, None, gene)
                    self.population.append(transition)
                    self.visited.add(knob2point(gene, self.dims))
                self.measure_configs(self.population, n_parallel, measure_batch, callbacks)
                self.initial_score = np.mean([p.score for p in self.population])
                self.reserve_elites()

            # Apply GA-DQN tuning once initial population has been created.
            else:
                if do_crossover:
                    self.population.extend(self.elite_population)
                    self.reserve_elites()
                    self.crossover_update(n_parallel, measure_batch, callbacks)
                    do_crossover = False
                else:
                    self.mutate_update(n_parallel, measure_batch, callbacks)
                    do_crossover = True

            self.ttl = min(early_stopping + self.best_iter, n_trial) - self.step_count

            if self.step_count >= self.best_iter + early_stopping:
                logger.debug("Early stopped. Best iter: %d.", self.best_iter)
                break

        GLOBAL_SCOPE.in_tuning = False
        del measure_batch
Example #3
0
def run_one_wkl(wkl, new_log_path, inputs):
    task = wkl.to_task()

    # Re-tune the best configs.
    log_writter = log_to_file(new_log_path)
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(timeout=10),
        runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=1000))
    measure_batch = create_measure_batch(task, measure_option)
    results = measure_batch(inputs)
    log_writter(None, inputs, results)

    del measure_batch
    return
Example #4
0
def tune_kernels(tasks,
                 gen_graph_tuner_candidates,
                 measure_top_n,
                 measure_option,
                 tuner='random',
                 early_stopping=None,
                 n_trial=5000,
                 log_filename='tuning.log'):
    """Tune kernels with the ranking model."""

    remeasure_option = None
    if tuner == 'round':
        # Setup another measure option for final remeasurment.
        remeasure_option = autotvm.measure_option(
            builder=LocalBuilder(),
            runner=measure_option['runner'].local_runner,
        )
        assert isinstance(measure_option['runner'], RankModelRunner)

    best_results = []

    for i, task in enumerate(tasks):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

        callbacks = []
        if task.name in [
                'dense_small_batch.cuda', 'conv2d_cudnn.cuda',
                'dense_cublas.cuda', 'dense_large_batch.cuda',
                'conv2d_transpose_nchw.cuda', 'dense_tensorcore.cuda'
        ]:
            # Ignore these four tasks
            continue
        if task.name not in measure_option['runner'].models:
            print('not covered by cost models')
            continue

        # create tuner
        if tuner == 'round':
            tuner_obj = RoundTuner(task, n_cfg=measure_top_n)
            callbacks = [rank_progress(n_trial, prefix=prefix)
                         ]  # Use different callbacks.
        else:
            if tuner in ('xgb', 'xgb-rank'):
                tuner_obj = XGBTuner(task, loss_type='rank')
            elif tuner == 'ga':
                tuner_obj = GATuner(task, pop_size=50)
            elif tuner == 'random':
                tuner_obj = RandomTuner(task)
            elif tuner == 'gridsearch':
                tuner_obj = GridSearchTuner(task)
            else:
                raise ValueError("Invalid tuner: " + tuner)

            callbacks = [
                autotvm.callback.progress_bar(n_trial, prefix=prefix),
                autotvm.callback.log_to_file(log_filename)
            ]

        tic = time.time()

        # do tuning
        tuner_obj.tune(n_trial=n_trial,
                       early_stopping=early_stopping,
                       measure_option=measure_option,
                       callbacks=callbacks)

        # Round tuner needs an extra measurement step to get the real throughputs.
        if tuner == 'round':
            max_n_layout = 20 if gen_graph_tuner_candidates else 1
            top_cfgs = tuner_obj.get_top_rank_cfgs(max_n_layout)
            measure_batch = create_measure_batch(task, remeasure_option)
            inputs = [
                MeasureInput(task.target, task, config) for config in top_cfgs
            ]
            sys.stderr.write('{} Measure Top {} Configs'.format(
                prefix, len(inputs)))
            results = measure_batch(inputs)
            best_idx, best_flops = max(
                [(idx, i.task.flop / np.mean(r.costs) /
                  1e9 if r.error_no == 0 else 0)
                 for idx, (i, r) in enumerate(zip(inputs, results))],
                key=lambda x: x[1])
            best_results.append((task.workload, best_idx, best_flops))
            sys.stderr.write(' | Best %.2f GFLOPS at Top %d | %.2fs\n' %
                             (best_flops, best_idx, time.time() - tic))
            autotvm.callback.log_to_file(log_filename)(None, inputs, results)
    return best_results
Example #5
0
def tune_kernels(
    tasks,
    measure_top_n,
    measure_option,
    tuner="random",
    early_stopping=None,
    n_trial=5000,
    log_filename="tuning.log",
):
    """Tune kernels with the ranking model."""

    remeasure_option = None
    if tuner == "round":
        # Setup another measure option for final remeasurment.
        remeasure_option = autotvm.measure_option(
            builder=LocalBuilder(),
            runner=measure_option["runner"].local_runner,
        )
        assert isinstance(measure_option["runner"], RankModelRunner)

    for i, task in enumerate(tasks):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

        callbacks = []
        if task.name not in measure_option["runner"].models:
            print("%s %s not covered by cost models" % (prefix, task.name))
            continue

        # create tuner
        if tuner == "round":
            tuner_obj = RoundTuner(task, n_cfg=measure_top_n)
            callbacks = [rank_progress(n_trial, prefix=prefix)
                         ]  # Use different callbacks.
        else:
            if tuner in ("xgb", "xgb-rank"):
                tuner_obj = XGBTuner(task, loss_type="rank")
            elif tuner == "ga":
                tuner_obj = GATuner(task, pop_size=50)
            elif tuner == "random":
                tuner_obj = RandomTuner(task)
            elif tuner == "gridsearch":
                tuner_obj = GridSearchTuner(task)
            else:
                raise ValueError("Invalid tuner: " + tuner)

            callbacks = [
                autotvm.callback.progress_bar(n_trial, prefix=prefix),
                autotvm.callback.log_to_file(log_filename),
            ]

        tic = time.time()

        # do tuning
        tuner_obj.tune(
            n_trial=n_trial,
            early_stopping=early_stopping,
            measure_option=measure_option,
            callbacks=callbacks,
        )

        # Round tuner needs an extra measurement step to get the real throughputs.
        if tuner == "round":
            top_cfgs = tuner_obj.get_top_rank_cfgs(1)
            measure_batch = create_measure_batch(task, remeasure_option)
            inputs = [
                MeasureInput(task.target, task, config) for config in top_cfgs
            ]
            sys.stderr.write("{} Measure Top {} Configs".format(
                prefix, len(inputs)))
            results = measure_batch(inputs)

            best_idx, best_flops = max(
                [(idx, i.task.flop / np.mean(r.costs) /
                  1e9 if r.error_no == 0 else 0)
                 for idx, (i, r) in enumerate(zip(inputs, results))],
                key=lambda x: x[1],
            )

            sys.stderr.write(" | Best %.2f GFLOPS at Top %d | %.2fs\n" %
                             (best_flops, best_idx, time.time() - tic))
            autotvm.callback.log_to_file(log_filename)(None, inputs, results)