def compute_time_train(model, loss_fun): """Computes precise model forward + backward time using dummy data.""" # Use train mode model.train() # Generate a dummy mini-batch and copy data to GPU im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS) inputs = torch.rand(batch_size, 3, im_size, im_size).cuda(non_blocking=False) labels = torch.zeros(batch_size, dtype=torch.int64).cuda(non_blocking=False) # Cache BatchNorm2D running stats bns = [m for m in model.modules() if isinstance(m, torch.nn.BatchNorm2d)] bn_stats = [[bn.running_mean.clone(), bn.running_var.clone()] for bn in bns] # Compute precise forward backward pass time fw_timer, bw_timer = Timer(), Timer() total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER for cur_iter in range(total_iter): # Reset the timers after the warmup phase if cur_iter == cfg.PREC_TIME.WARMUP_ITER: fw_timer.reset() bw_timer.reset() # Forward fw_timer.tic() _, preds, _ = model(inputs) loss = loss_fun(preds, labels) torch.cuda.synchronize() fw_timer.toc() # Backward bw_timer.tic() loss.backward() torch.cuda.synchronize() bw_timer.toc() # Restore BatchNorm2D running stats for bn, (mean, var) in zip(bns, bn_stats): bn.running_mean, bn.running_var = mean, var return fw_timer.average_time, bw_timer.average_time
def compute_time_loader(data_loader): """Computes loader time.""" timer = Timer() loader.shuffle(data_loader, 0) data_loader_iterator = iter(data_loader) total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER total_iter = min(total_iter, len(data_loader)) for cur_iter in range(total_iter): if cur_iter == cfg.PREC_TIME.WARMUP_ITER: timer.reset() timer.tic() next(data_loader_iterator) timer.toc() return timer.average_time
def compute_time_eval(model): """Computes precise model forward test time using dummy data.""" # Use eval mode model.eval() # Generate a dummy mini-batch and copy data to GPU im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS) inputs = torch.zeros(batch_size, 3, im_size, im_size).cuda(non_blocking=False) # Compute precise forward pass time timer = Timer() total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER for cur_iter in range(total_iter): # Reset the timers after the warmup phase if cur_iter == cfg.PREC_TIME.WARMUP_ITER: timer.reset() # Forward timer.tic() model(inputs) torch.cuda.synchronize() timer.toc() return timer.average_time
class TrainMeter(object): """Measures training stats.""" def __init__(self, epoch_iters): self.epoch_iters = epoch_iters self.max_iter = cfg.OPTIM.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.desc_loss = ScalarMeter(cfg.LOG_PERIOD) self.desc_loss_total = 0.0 self.att_loss = ScalarMeter(cfg.LOG_PERIOD) self.att_loss_total = 0.0 self.lr = None # Current minibatch errors (smoothed over a window) self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_att_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_att_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Number of misclassified examples self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_att_top1_mis = 0 self.num_att_top5_mis = 0 self.num_samples = 0 def reset(self, timer=False): if timer: self.iter_timer.reset() self.desc_loss.reset() self.att_loss.reset() self.desc_loss_total = 0.0 self.att_loss_total = 0.0 self.lr = None self.mb_top1_err.reset() self.mb_top5_err.reset() self.mb_att_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_att_top5_err = ScalarMeter(cfg.LOG_PERIOD) self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_att_top1_mis = 0 self.num_att_top5_mis = 0 self.num_samples = 0 def iter_tic(self): self.iter_timer.tic() def iter_toc(self): self.iter_timer.toc() def update_stats(self, desc_top1_err, desc_top5_err, att_top1_err, att_top5_err, desc_loss, att_loss, lr, mb_size): # Current minibatch stats self.mb_top1_err.add_value(desc_top1_err) self.mb_top5_err.add_value(desc_top5_err) self.desc_loss.add_value(desc_loss) self.mb_att_top1_err.add_value(att_top1_err) self.mb_att_top5_err.add_value(att_top5_err) self.att_loss.add_value(att_loss) self.lr = lr # Aggregate stats self.num_top1_mis += desc_top1_err * mb_size self.num_top5_mis += desc_top5_err * mb_size self.num_att_top1_mis += att_top1_err * mb_size self.num_att_top5_mis += att_top5_err * mb_size self.desc_loss_total += desc_loss * mb_size self.att_loss_total += att_loss * mb_size self.num_samples += mb_size def get_iter_stats(self, cur_epoch, cur_iter): cur_iter_total = cur_epoch * self.epoch_iters + cur_iter + 1 eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total) mem_usage = gpu_mem_usage() stats = { "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time_avg": self.iter_timer.average_time, "time_diff": self.iter_timer.diff, "eta": time_string(eta_sec), "desc_top1_err": self.mb_top1_err.get_win_median(), "desc_top5_err": self.mb_top5_err.get_win_median(), "desc_loss": self.desc_loss.get_win_median(), "att_top1_err": self.mb_att_top1_err.get_win_median(), "att_top5_err": self.mb_att_top5_err.get_win_median(), "att_loss": self.att_loss.get_win_median(), "lr": self.lr, "mem": int(np.ceil(mem_usage)), } return stats def log_iter_stats(self, cur_epoch, cur_iter): if (cur_iter + 1) % cfg.LOG_PERIOD != 0: return stats = self.get_iter_stats(cur_epoch, cur_iter) logger.info(logging.dump_log_data(stats, "train_iter")) def get_epoch_stats(self, cur_epoch): cur_iter_total = (cur_epoch + 1) * self.epoch_iters eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total) mem_usage = gpu_mem_usage() desc_top1_err = self.num_top1_mis / self.num_samples desc_top5_err = self.num_top5_mis / self.num_samples desc_avg_loss = self.desc_loss_total / self.num_samples att_top1_err = self.num_att_top1_mis / self.num_samples att_top5_err = self.num_att_top5_mis / self.num_samples att_avg_loss = self.att_loss_total / self.num_samples stats = { "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH), "time_avg": self.iter_timer.average_time, "eta": time_string(eta_sec), "desc_top1_err": desc_top1_err, "desc_top5_err": desc_top5_err, "desc_loss": desc_avg_loss, "att_top1_err": att_top1_err, "att_top5_err": att_top5_err, "att_loss": att_avg_loss, "lr": self.lr, "mem": int(np.ceil(mem_usage)), } return stats def log_epoch_stats(self, cur_epoch): stats = self.get_epoch_stats(cur_epoch) logger.info(logging.dump_log_data(stats, "train_epoch"))
class TestMeter(object): """Measures testing stats (only desc).""" def __init__(self, max_iter): self.max_iter = max_iter self.iter_timer = Timer() # Current minibatch errors (smoothed over a window) self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Min errors (over the full test set) self.min_top1_err = 100.0 self.min_top5_err = 100.0 # Number of misclassified examples self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 def reset(self, min_errs=False): if min_errs: self.min_top1_err = 100.0 self.min_top5_err = 100.0 self.iter_timer.reset() self.mb_top1_err.reset() self.mb_top5_err.reset() self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 def iter_tic(self): self.iter_timer.tic() def iter_toc(self): self.iter_timer.toc() def update_stats(self, top1_err, top5_err, mb_size): self.mb_top1_err.add_value(top1_err) self.mb_top5_err.add_value(top5_err) self.num_top1_mis += top1_err * mb_size self.num_top5_mis += top5_err * mb_size self.num_samples += mb_size def get_iter_stats(self, cur_epoch, cur_iter): mem_usage = gpu_mem_usage() iter_stats = { "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.max_iter), "time_avg": self.iter_timer.average_time, "time_diff": self.iter_timer.diff, "top1_err": self.mb_top1_err.get_win_median(), "top5_err": self.mb_top5_err.get_win_median(), "mem": int(np.ceil(mem_usage)), } return iter_stats def log_iter_stats(self, cur_epoch, cur_iter): if (cur_iter + 1) % cfg.LOG_PERIOD != 0: return stats = self.get_iter_stats(cur_epoch, cur_iter) logger.info(logging.dump_log_data(stats, "test_iter")) def get_epoch_stats(self, cur_epoch): top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples self.min_top1_err = min(self.min_top1_err, top1_err) self.min_top5_err = min(self.min_top5_err, top5_err) mem_usage = gpu_mem_usage() stats = { "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH), "time_avg": self.iter_timer.average_time, "top1_err": top1_err, "top5_err": top5_err, "min_top1_err": self.min_top1_err, "min_top5_err": self.min_top5_err, "mem": int(np.ceil(mem_usage)), } return stats def log_epoch_stats(self, cur_epoch): stats = self.get_epoch_stats(cur_epoch) logger.info(logging.dump_log_data(stats, "test_epoch"))