class IterationTimer(TrainHook): def __init__(self, max_iter, start_iter, warmup_iter, ignore_warmup_time): self.warmup_iter = warmup_iter self.step_timer = Timer() self.start_iter = start_iter self.max_iter = max_iter self.ignore_warmup_time = ignore_warmup_time def before_train(self, **kwargs): self.start_time = time.perf_counter() self.total_timer = Timer() self.total_timer.pause() def after_train(self, storage, **kwargs): iter = storage.iter total_time = time.perf_counter() - self.start_time total_time_minus_hooks = self.total_timer.seconds() hook_time = total_time - total_time_minus_hooks num_iter = iter + 1 - self.start_iter - self.warmup_iter if num_iter > 0 and total_time_minus_hooks > 0: # Speed is meaningful only after warmup # NOTE this format is parsed by grep in some scripts logging_rank( "Overall training speed: {} iterations in {} ({:.4f} s / it)". format( num_iter, str(datetime.timedelta( seconds=int(total_time_minus_hooks))), total_time_minus_hooks / num_iter, )) logging_rank("Total training time: {} ({} on hooks)".format( str(datetime.timedelta(seconds=int(total_time))), str(datetime.timedelta(seconds=int(hook_time))), )) def before_step(self, **kwargs): self.step_timer.reset() self.total_timer.resume() def after_step(self, storage, **kwargs): # +1 because we're in after_step if self.ignore_warmup_time: # ignore warm up time cost if storage.iter >= self.warmup_iter: sec = self.step_timer.seconds() storage.put_scalars(time=sec) else: self.start_time = time.perf_counter() self.total_timer.reset() else: sec = self.step_timer.seconds() storage.put_scalars(time=sec) self.total_timer.pause()
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, args, log_period=20, tensorboard_logger=None): # Output logging period in SGD iterations self.args = args self.log_period = log_period self.tblogger = tensorboard_logger self.tb_ignored_keys = ['iter', 'eta', 'epoch', 'time'] self.iter_timer = Timer() # Window size for smoothing tracked values (with median filtering) self.filter_size = 20 def create_smoothed_value(): return SmoothedValue(self.filter_size) self.smoothed_losses = defaultdict(create_smoothed_value) # self.smoothed_metrics = defaultdict(create_smoothed_value) self.smoothed_total_loss = SmoothedValue(self.filter_size) def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self, loss): """Update tracked iteration statistics.""" total_loss = 0 for k in loss: # all losses except the total loss: loss['all'] if k != 'total_loss': self.smoothed_losses[k].AddValue(float(loss[k])) total_loss += loss['total_loss'] self.smoothed_total_loss.AddValue(float(total_loss)) def LogIterStats(self, cur_iter, cur_epoch, optimizer, val_err={}): """Log the tracked statistics.""" if (cur_iter % self.log_period == 0): stats = self.GetStats(cur_iter, cur_epoch, optimizer, val_err) log_stats(stats, self.args) if self.tblogger: self.tb_log_stats(stats, cur_iter) def tb_log_stats(self, stats, cur_iter): """Log the tracked statistics to tensorboard""" for k in stats: if k not in self.tb_ignored_keys: v = stats[k] if isinstance(v, dict): self.tb_log_stats(v, cur_iter) else: self.tblogger.add_scalar(k, v, cur_iter) def GetStats(self, cur_iter, cur_epoch, optimizer, val_err={}): eta_seconds = self.iter_timer.average_time * (cfg.TRAIN.MAX_ITER - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_seconds))) stats = OrderedDict( iter=cur_iter, # 1-indexed time=self.iter_timer.average_time, eta=eta, total_loss=self.smoothed_total_loss.GetMedianValue(), epoch=cur_epoch, ) optimizer_state_dict = optimizer.state_dict() lr = {} for i in range(len(optimizer_state_dict['param_groups'])): lr_name = 'group%d_lr' % i lr[lr_name] = optimizer_state_dict['param_groups'][i]['lr'] stats['lr'] = OrderedDict(lr) for k, v in self.smoothed_losses.items(): stats[k] = OrderedDict([(k, v.GetMedianValue())]) stats['val_err'] = OrderedDict(val_err) return stats