Example #1
0
class IterationTimer(TrainHook):
    def __init__(self, max_iter, start_iter, warmup_iter, ignore_warmup_time):
        self.warmup_iter = warmup_iter
        self.step_timer = Timer()
        self.start_iter = start_iter
        self.max_iter = max_iter
        self.ignore_warmup_time = ignore_warmup_time

    def before_train(self, **kwargs):
        self.start_time = time.perf_counter()
        self.total_timer = Timer()
        self.total_timer.pause()

    def after_train(self, storage, **kwargs):
        iter = storage.iter
        total_time = time.perf_counter() - self.start_time
        total_time_minus_hooks = self.total_timer.seconds()
        hook_time = total_time - total_time_minus_hooks

        num_iter = iter + 1 - self.start_iter - self.warmup_iter

        if num_iter > 0 and total_time_minus_hooks > 0:
            # Speed is meaningful only after warmup
            # NOTE this format is parsed by grep in some scripts
            logging_rank(
                "Overall training speed: {} iterations in {} ({:.4f} s / it)".
                format(
                    num_iter,
                    str(datetime.timedelta(
                        seconds=int(total_time_minus_hooks))),
                    total_time_minus_hooks / num_iter,
                ))

        logging_rank("Total training time: {} ({} on hooks)".format(
            str(datetime.timedelta(seconds=int(total_time))),
            str(datetime.timedelta(seconds=int(hook_time))),
        ))

    def before_step(self, **kwargs):
        self.step_timer.reset()
        self.total_timer.resume()

    def after_step(self, storage, **kwargs):
        # +1 because we're in after_step
        if self.ignore_warmup_time:
            # ignore warm up time cost
            if storage.iter >= self.warmup_iter:
                sec = self.step_timer.seconds()
                storage.put_scalars(time=sec)
            else:
                self.start_time = time.perf_counter()
                self.total_timer.reset()
        else:
            sec = self.step_timer.seconds()
            storage.put_scalars(time=sec)

        self.total_timer.pause()
Example #2
0
class TrainingStats(object):
    """Track vital training statistics."""
    def __init__(self, args, log_period=20, tensorboard_logger=None):
        # Output logging period in SGD iterations
        self.args = args
        self.log_period = log_period
        self.tblogger = tensorboard_logger
        self.tb_ignored_keys = ['iter', 'eta', 'epoch', 'time']
        self.iter_timer = Timer()
        # Window size for smoothing tracked values (with median filtering)
        self.filter_size = 20

        def create_smoothed_value():
            return SmoothedValue(self.filter_size)

        self.smoothed_losses = defaultdict(create_smoothed_value)
        # self.smoothed_metrics = defaultdict(create_smoothed_value)
        self.smoothed_total_loss = SmoothedValue(self.filter_size)

    def IterTic(self):
        self.iter_timer.tic()

    def IterToc(self):
        return self.iter_timer.toc(average=False)

    def ResetIterTimer(self):
        self.iter_timer.reset()

    def UpdateIterStats(self, loss):
        """Update tracked iteration statistics."""
        total_loss = 0
        for k in loss:
            # all losses except the total loss: loss['all']
            if k != 'total_loss':
                self.smoothed_losses[k].AddValue(float(loss[k]))

        total_loss += loss['total_loss']
        self.smoothed_total_loss.AddValue(float(total_loss))

    def LogIterStats(self, cur_iter, cur_epoch, optimizer, val_err={}):
        """Log the tracked statistics."""
        if (cur_iter % self.log_period == 0):
            stats = self.GetStats(cur_iter, cur_epoch, optimizer, val_err)
            log_stats(stats, self.args)
            if self.tblogger:
                self.tb_log_stats(stats, cur_iter)

    def tb_log_stats(self, stats, cur_iter):
        """Log the tracked statistics to tensorboard"""
        for k in stats:
            if k not in self.tb_ignored_keys:
                v = stats[k]
                if isinstance(v, dict):
                    self.tb_log_stats(v, cur_iter)
                else:
                    self.tblogger.add_scalar(k, v, cur_iter)

    def GetStats(self, cur_iter, cur_epoch, optimizer, val_err={}):
        eta_seconds = self.iter_timer.average_time * (cfg.TRAIN.MAX_ITER -
                                                      cur_iter)
        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
        stats = OrderedDict(
            iter=cur_iter,  # 1-indexed
            time=self.iter_timer.average_time,
            eta=eta,
            total_loss=self.smoothed_total_loss.GetMedianValue(),
            epoch=cur_epoch,
        )
        optimizer_state_dict = optimizer.state_dict()
        lr = {}
        for i in range(len(optimizer_state_dict['param_groups'])):
            lr_name = 'group%d_lr' % i
            lr[lr_name] = optimizer_state_dict['param_groups'][i]['lr']

        stats['lr'] = OrderedDict(lr)
        for k, v in self.smoothed_losses.items():
            stats[k] = OrderedDict([(k, v.GetMedianValue())])

        stats['val_err'] = OrderedDict(val_err)
        return stats