class IterationTimer(HookBase): def __init__(self, warmup_iter=3): self._warmup_iter = warmup_iter self._step_timer = Timer() self._start_time = time.perf_counter() self._total_timer = Timer() def before_train(self): self._start_time = time.perf_counter() self._total_timer.reset() self._total_timer.pause() def after_train(self): logger = logging.getLogger(__name__) total_time = time.perf_counter() - self._start_time total_time_minus_hooks = self._total_timer.seconds() hook_time = total_time - total_time_minus_hooks num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter if num_iter > 0 and total_time_minus_hooks > 0: logger.info( "Overall training speed: {} iterations in {} ({:.4f} s / it)".format( num_iter, str(datetime.timedelta(seconds=int(total_time_minus_hooks))), total_time_minus_hooks / num_iter, ) ) logger.info( "Total training time: {} ({} on hooks)".format( str(datetime.timedelta(seconds=int(total_time))), str(datetime.timedelta(seconds=int(hook_time))), ) ) def before_step(self): self._step_timer.reset() self._total_timer.resume() def after_step(self): iter_done = self.trainer.iter - self.trainer.start_iter + 1 if iter_done >= self._warmup_iter: sec = self._step_timer.seconds() self.trainer.storage.put_scalars(time=sec) else: self._start_time = time.perf_counter() self._total_timer.reset() self._total_timer.pause()
def test_avg_second(self) -> None: """ Test avg_seconds that counts the average time. """ for pause_second in (0.1, 0.15): timer = Timer() for t in (pause_second,) * 10: if timer.is_paused(): timer.resume() time.sleep(t) timer.pause() self.assertTrue( math.isclose(pause_second, timer.avg_seconds(), rel_tol=1e-1), msg="{}: {}".format(pause_second, timer.avg_seconds()), )
def test_timer(self): timer = Timer() time.sleep(0.5) self.assertTrue(0.99 > timer.seconds() >= 0.5) timer.pause() time.sleep(0.5) self.assertTrue(0.99 > timer.seconds() >= 0.5) timer.resume() time.sleep(0.5) self.assertTrue(1.49 > timer.seconds() >= 1.0) timer.reset() self.assertTrue(0.49 > timer.seconds() >= 0)
def test_timer(self) -> None: """ Test basic timer functions (pause, resume, and reset). """ timer = Timer() time.sleep(0.5) self.assertTrue(0.99 > timer.seconds() >= 0.5) timer.pause() time.sleep(0.5) self.assertTrue(0.99 > timer.seconds() >= 0.5) timer.resume() time.sleep(0.5) self.assertTrue(1.49 > timer.seconds() >= 1.0) timer.reset() self.assertTrue(0.49 > timer.seconds() >= 0)
class TestMeter(object): """ Perform the multi-view ensemble for testing: each video with an unique index will be sampled with multiple clips, and the predictions of the clips will be aggregated to produce the final prediction for the video. The accuracy is calculated with the given ground truth labels. """ def __init__(self, num_videos, num_clips, num_cls, overall_iters): """ Construct tensors to store the predictions and labels. Expect to get num_clips predictions from each video, and calculate the metrics on num_videos videos. Args: num_videos (int): number of videos to test. num_clips (int): number of clips sampled from each video for aggregating the final prediction for the video. num_cls (int): number of classes for each prediction. overall_iters (int): overall iterations for testing. """ self.iter_timer = Timer() self.num_clips = num_clips self.overall_iters = overall_iters # Initialize tensors. self.video_preds = torch.zeros((num_videos, num_cls)) self.video_labels = torch.zeros((num_videos)).long() self.clip_count = torch.zeros((num_videos)).long() # Reset metric. self.reset() def reset(self): """ Reset the metric. """ self.clip_count.zero_() self.video_preds.zero_() self.video_labels.zero_() def update_stats(self, preds, labels, clip_ids): """ Collect the predictions from the current batch and perform on-the-flight summation as ensemble. Args: preds (tensor): predictions from the current batch. Dimension is N x C where N is the batch size and C is the channel size (num_cls). labels (tensor): the corresponding labels of the current batch. Dimension is N. clip_ids (tensor): clip indexes of the current batch, dimension is N. """ for ind in range(preds.shape[0]): vid_id = int(clip_ids[ind]) // self.num_clips self.video_labels[vid_id] = labels[ind] self.video_preds[vid_id] += preds[ind] self.clip_count[vid_id] += 1 def log_iter_stats(self, cur_iter): """ Log the stats. Args: cur_iter (int): the current iteration of testing. """ eta_sec = self.iter_timer.seconds() * (self.overall_iters - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "split": "test_iter", "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "time_diff": self.iter_timer.seconds(), } logging.log_json_stats(stats) def iter_tic(self): self.iter_timer.reset() def iter_toc(self): self.iter_timer.pause() def finalize_metrics(self, ks=(1, 5)): """ Calculate and log the final ensembled metrics. ks (tuple): list of top-k values for topk_accuracies. For example, ks = (1, 5) correspods to top-1 and top-5 accuracy. """ if not all(self.clip_count == self.num_clips): logger.warning( "clip count {} ~= num clips {}".format( self.clip_count, self.num_clips ) ) logger.warning(self.clip_count) num_topks_correct = metrics.topks_correct( self.video_preds, self.video_labels, ks ) topks = [ (x / self.video_preds.size(0)) * 100.0 for x in num_topks_correct ] assert len({len(ks), len(topks)}) == 1 stats = {"split": "test_final"} for k, topk in zip(ks, topks): stats["top{}_acc".format(k)] = "{:.{prec}f}".format(topk, prec=2) logging.log_json_stats(stats)
class EPICValMeter(object): """ Measures validation stats. """ def __init__(self, summary_writer, max_iter, cfg): """ Args: max_iter (int): the max number of iteration of the current epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.max_iter = max_iter self.iter_timer = Timer() # Current minibatch accuracies (smoothed over a window). self.mb_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_verb_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_verb_top5_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_noun_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_noun_top5_acc = ScalarMeter(cfg.LOG_PERIOD) # Max accuracies (over the full val set). self.max_top1_acc = 0.0 self.max_top5_acc = 0.0 self.max_verb_top1_acc = 0.0 self.max_verb_top5_acc = 0.0 self.max_noun_top1_acc = 0.0 self.max_noun_top5_acc = 0.0 # Number of correctly classified examples. self.num_top1_cor = 0 self.num_top5_cor = 0 self.num_verb_top1_cor = 0 self.num_verb_top5_cor = 0 self.num_noun_top1_cor = 0 self.num_noun_top5_cor = 0 self.num_samples = 0 self.tb_writer: SummaryWriter = summary_writer def reset(self): """ Reset the Meter. """ self.iter_timer.reset() self.mb_top1_acc.reset() self.mb_top5_acc.reset() self.mb_verb_top1_acc.reset() self.mb_verb_top5_acc.reset() self.mb_noun_top1_acc.reset() self.mb_noun_top5_acc.reset() self.num_top1_cor = 0 self.num_top5_cor = 0 self.num_verb_top1_cor = 0 self.num_verb_top5_cor = 0 self.num_noun_top1_cor = 0 self.num_noun_top5_cor = 0 self.num_samples = 0 def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() def update_stats(self, top1_acc, top5_acc, mb_size): """ Update the current stats. Args: top1_acc (float): top1 accuracy rate. top5_acc (float): top5 accuracy rate. mb_size (int): mini batch size. """ self.mb_verb_top1_acc.add_value(top1_acc[0]) self.mb_verb_top5_acc.add_value(top5_acc[0]) self.mb_noun_top1_acc.add_value(top1_acc[1]) self.mb_noun_top5_acc.add_value(top5_acc[1]) self.mb_top1_acc.add_value(top1_acc[2]) self.mb_top5_acc.add_value(top5_acc[2]) self.num_verb_top1_cor += top1_acc[0] * mb_size self.num_verb_top5_cor += top5_acc[0] * mb_size self.num_noun_top1_cor += top1_acc[1] * mb_size self.num_noun_top5_cor += top5_acc[1] * mb_size self.num_top1_cor += top1_acc[2] * mb_size self.num_top5_cor += top5_acc[2] * mb_size self.num_samples += mb_size def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * (self.max_iter - cur_iter - 1) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() stats = { "_type": "val_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.max_iter), "time_diff": self.iter_timer.seconds(), "eta": eta, "verb_top1_acc": self.mb_verb_top1_acc.get_win_median(), "verb_top5_acc": self.mb_verb_top5_acc.get_win_median(), "noun_top1_acc": self.mb_noun_top1_acc.get_win_median(), "noun_top5_acc": self.mb_noun_top5_acc.get_win_median(), "top1_acc": self.mb_top1_acc.get_win_median(), "top5_acc": self.mb_top5_acc.get_win_median(), "mem": int(np.ceil(mem_usage)), } log_to_tensorboard(self.tb_writer, stats) logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ verb_top1_acc = self.num_verb_top1_cor / self.num_samples verb_top5_acc = self.num_verb_top5_cor / self.num_samples noun_top1_acc = self.num_noun_top1_cor / self.num_samples noun_top5_acc = self.num_noun_top5_cor / self.num_samples top1_acc = self.num_top1_cor / self.num_samples top5_acc = self.num_top5_cor / self.num_samples self.max_verb_top1_acc = max(self.max_verb_top1_acc, verb_top1_acc) self.max_verb_top5_acc = max(self.max_verb_top5_acc, verb_top5_acc) self.max_noun_top1_acc = max(self.max_noun_top1_acc, noun_top1_acc) self.max_noun_top5_acc = max(self.max_noun_top5_acc, noun_top5_acc) is_best_epoch = top1_acc > self.max_top1_acc self.max_top1_acc = max(self.max_top1_acc, top1_acc) self.max_top5_acc = max(self.max_top5_acc, top5_acc) mem_usage = misc.gpu_mem_usage() stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "verb_top1_acc": verb_top1_acc, "verb_top5_acc": verb_top5_acc, "noun_top1_acc": noun_top1_acc, "noun_top5_acc": noun_top5_acc, "top1_acc": top1_acc, "top5_acc": top5_acc, "max_verb_top1_acc": self.max_verb_top1_acc, "max_verb_top5_acc": self.max_verb_top5_acc, "max_noun_top1_acc": self.max_noun_top1_acc, "max_noun_top5_acc": self.max_noun_top5_acc, "max_top1_acc": self.max_top1_acc, "max_top5_acc": self.max_top5_acc, "mem": int(np.ceil(mem_usage)), } log_to_tensorboard(self.tb_writer, stats, False) logging.log_json_stats(stats) return is_best_epoch
class EPICTrainMeter(object): """ Measure training stats. """ def __init__(self, summary_writer, epoch_iters, cfg): """ Args: epoch_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.epoch_iters = epoch_iters self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.loss = ScalarMeter(cfg.LOG_PERIOD) self.loss_total = 0.0 self.loss_verb = ScalarMeter(cfg.LOG_PERIOD) self.loss_verb_total = 0.0 self.loss_noun = ScalarMeter(cfg.LOG_PERIOD) self.loss_noun_total = 0.0 self.lr = None # Current minibatch accuracies (smoothed over a window). self.mb_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_verb_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_verb_top5_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_noun_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_noun_top5_acc = ScalarMeter(cfg.LOG_PERIOD) # Number of correctly classified examples. self.num_top1_cor = 0 self.num_top5_cor = 0 self.num_verb_top1_cor = 0 self.num_verb_top5_cor = 0 self.num_noun_top1_cor = 0 self.num_noun_top5_cor = 0 self.num_samples = 0 self.tb_writer: SummaryWriter = summary_writer def reset(self): """ Reset the Meter. """ self.loss.reset() self.loss_total = 0.0 self.loss_verb.reset() self.loss_verb_total = 0.0 self.loss_noun.reset() self.loss_noun_total = 0.0 self.lr = None self.mb_top1_acc.reset() self.mb_top5_acc.reset() self.mb_verb_top1_acc.reset() self.mb_verb_top5_acc.reset() self.mb_noun_top1_acc.reset() self.mb_noun_top5_acc.reset() self.num_top1_cor = 0 self.num_top5_cor = 0 self.num_verb_top1_cor = 0 self.num_verb_top5_cor = 0 self.num_noun_top1_cor = 0 self.num_noun_top5_cor = 0 self.num_samples = 0 def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() def update_stats(self, top1_acc, top5_acc, loss, lr, mb_size): """ Update the current stats. Args: top1_acc (float): top1 accuracy rate. top5_acc (float): top5 accuracy rate. loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ # Current minibatch stats self.mb_verb_top1_acc.add_value(top1_acc[0]) self.mb_verb_top5_acc.add_value(top5_acc[0]) self.mb_noun_top1_acc.add_value(top1_acc[1]) self.mb_noun_top5_acc.add_value(top5_acc[1]) self.mb_top1_acc.add_value(top1_acc[2]) self.mb_top5_acc.add_value(top5_acc[2]) self.loss_verb.add_value(loss[0]) self.loss_noun.add_value(loss[1]) self.loss.add_value(loss[2]) self.lr = lr # Aggregate stats self.num_verb_top1_cor += top1_acc[0] * mb_size self.num_verb_top5_cor += top5_acc[0] * mb_size self.num_noun_top1_cor += top1_acc[1] * mb_size self.num_noun_top5_cor += top5_acc[1] * mb_size self.num_top1_cor += top1_acc[2] * mb_size self.num_top5_cor += top5_acc[2] * mb_size self.loss_verb_total += loss[0] * mb_size self.loss_noun_total += loss[1] * mb_size self.loss_total += loss[2] * mb_size self.num_samples += mb_size def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1)) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time_diff": self.iter_timer.seconds(), "eta": eta, "verb_top1_acc": self.mb_verb_top1_acc.get_win_median(), "verb_top5_acc": self.mb_verb_top5_acc.get_win_median(), "noun_top1_acc": self.mb_noun_top1_acc.get_win_median(), "noun_top5_acc": self.mb_noun_top5_acc.get_win_median(), "top1_acc": self.mb_top1_acc.get_win_median(), "top5_acc": self.mb_top5_acc.get_win_median(), "verb_loss": self.loss_verb.get_win_median(), "noun_loss": self.loss_noun.get_win_median(), "loss": self.loss.get_win_median(), "lr": self.lr, "mem": int(np.ceil(mem_usage)), } log_to_tensorboard(self.tb_writer, stats) logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() verb_top1_acc = self.num_verb_top1_cor / self.num_samples verb_top5_acc = self.num_verb_top5_cor / self.num_samples noun_top1_acc = self.num_noun_top1_cor / self.num_samples noun_top5_acc = self.num_noun_top5_cor / self.num_samples top1_acc = self.num_top1_cor / self.num_samples top5_acc = self.num_top5_cor / self.num_samples avg_loss_verb = self.loss_verb_total / self.num_samples avg_loss_noun = self.loss_noun_total / self.num_samples avg_loss = self.loss_total / self.num_samples stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "eta": eta, "verb_top1_acc": verb_top1_acc, "verb_top5_acc": verb_top5_acc, "noun_top1_acc": noun_top1_acc, "noun_top5_acc": noun_top5_acc, "top1_acc": top1_acc, "top5_acc": top5_acc, "verb_loss": avg_loss_verb, "noun_loss": avg_loss_noun, "loss": avg_loss, "lr": self.lr, "mem": int(np.ceil(mem_usage)), } log_to_tensorboard(self.tb_writer, stats, False) logging.log_json_stats(stats)
class TestMeter(object): """ Perform the multi-view ensemble for testing: each video with an unique index will be sampled with multiple clips, and the predictions of the clips will be aggregated to produce the final prediction for the video. The accuracy is calculated with the given ground truth labels. """ def __init__(self, num_videos, num_clips, num_cls, overall_iters, isDemo): """ Construct tensors to store the predictions and labels. Expect to get num_clips predictions from each video, and calculate the metrics on num_videos videos. Args: num_videos (int): number of videos to test. num_clips (int): number of clips sampled from each video for aggregating the final prediction for the video. num_cls (int): number of classes for each prediction. overall_iters (int): overall iterations for testing. """ self.iter_timer = Timer() self.num_clips = num_clips self.overall_iters = overall_iters # Initialize tensors. self.video_preds = torch.zeros((num_videos, num_cls)) self.video_labels = torch.zeros((num_videos)).long() self.clip_count = torch.zeros((num_videos)).long() # Reset metric. self.reset() self.isDemo = isDemo def reset(self): """ Reset the metric. """ self.clip_count.zero_() self.video_preds.zero_() self.video_labels.zero_() def update_stats(self, preds, labels, clip_ids): """ Collect the predictions from the current batch and perform on-the-flight summation as ensemble. Args: preds (tensor): predictions from the current batch. Dimension is N x C where N is the batch size and C is the channel size (num_cls). labels (tensor): the corresponding labels of the current batch. Dimension is N. clip_ids (tensor): clip indexes of the current batch, dimension is N. """ #print(preds,labels) for ind in range(preds.shape[0]): vid_id = int(clip_ids[ind]) // self.num_clips self.video_labels[vid_id] = labels[ind] self.video_preds[vid_id] += preds[ind] self.clip_count[vid_id] += 1 def log_iter_stats(self, cur_iter): """ Log the stats. Args: cur_iter (int): the current iteration of testing. """ eta_sec = self.iter_timer.seconds() * (self.overall_iters - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "split": "test_iter", "cur_iter": "{}".format(cur_iter + 1), #"eta": eta, #"time_diff": self.iter_timer.seconds(), } #logging.log_json_stats(stats) def iter_tic(self): self.iter_timer.reset() def iter_toc(self): self.iter_timer.pause() def finalize_metrics(self, ks=(1, 2)): """ Calculate and log the final ensembled metrics. ks (tuple): list of top-k values for topk_accuracies. For example, ks = (1, 5) correspods to top-1 and top-5 accuracy. """ if self.isDemo: preds_numpy = self.video_preds.clone() normalize = np.array(softmax(preds_numpy.cpu().numpy())) jogging_label = 21 sort_p = [] for p in normalize: sort_p.append(sorted(p, reverse=True)) propability = np.transpose( np.array(softmax(preds_numpy.cpu().numpy()))) for i, v in enumerate(propability[jogging_label]): top1_v = sort_p[i][0] top2_v = sort_p[i][1] if v == top1_v or v == top2_v: propability[jogging_label][ i] = propability[jogging_label][i] / (top1_v + top2_v) cwd = os.getcwd() tmp_dir = os.path.join(cwd, "tmp") if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) out_dir = os.path.join(tmp_dir, "probability.npy") np.save(out_dir, propability[jogging_label]) if not all(self.clip_count == self.num_clips): logger.warning("clip count {} ~= num clips {}".format( self.clip_count, self.num_clips)) logger.warning(self.clip_count) num_topks_correct = metrics.topks_correct(self.video_preds, self.video_labels, ks) topks = [(x / self.video_preds.size(0)) * 100.0 for x in num_topks_correct] #binary = [ # (x / self.video_preds.size(0)) * 100.0 for x in binary_correct #] assert len({len(ks), len(topks)}) == 1 stats = {"split": "test_final"} for k, topk in zip(ks, topks): stats["top{}_acc".format(k)] = "{:.{prec}f}".format(topk, prec=2)
class TrainMeter(object): """ Measure training stats. """ def __init__(self, epoch_iters, cfg): """ Args: epoch_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.epoch_iters = epoch_iters self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() self.loss = ScalarMeter(cfg.LOG_PERIOD) self.loss_total = 0.0 self.lr = None # Current minibatch errors (smoothed over a window). self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Number of misclassified examples. self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 self.output_dir = cfg.OUTPUT_DIR def reset(self): """ Reset the Meter. """ self.loss.reset() self.loss_total = 0.0 self.lr = None self.mb_top1_err.reset() self.mb_top5_err.reset() self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def update_stats(self, top1_err, top5_err, loss, lr, mb_size): """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ self.loss.add_value(loss) self.lr = lr self.loss_total += loss * mb_size self.num_samples += mb_size if not self._cfg.DATA.MULTI_LABEL: # Current minibatch stats self.mb_top1_err.add_value(top1_err) self.mb_top5_err.add_value(top5_err) # Aggregate stats self.num_top1_mis += top1_err * mb_size self.num_top5_mis += top5_err * mb_size def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1)) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "eta": eta, "loss": self.loss.get_win_median(), "lr": self.lr, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: stats["top1_err"] = self.mb_top1_err.get_win_median() stats["top5_err"] = self.mb_top5_err.get_win_median() logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "eta": eta, "lr": self.lr, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples avg_loss = self.loss_total / self.num_samples stats["top1_err"] = top1_err stats["top5_err"] = top5_err stats["loss"] = avg_loss logging.log_json_stats(stats)
class TestMeter(object): """ Perform the multi-view ensemble for testing: each video with an unique index will be sampled with multiple clips, and the predictions of the clips will be aggregated to produce the final prediction for the video. The accuracy is calculated with the given ground truth labels. """ def __init__( self, num_videos, num_clips, num_cls, overall_iters, multi_label=False, ensemble_method="sum", ): """ Construct tensors to store the predictions and labels. Expect to get num_clips predictions from each video, and calculate the metrics on num_videos videos. Args: num_videos (int): number of videos to test. num_clips (int): number of clips sampled from each video for aggregating the final prediction for the video. num_cls (int): number of classes for each prediction. overall_iters (int): overall iterations for testing. multi_label (bool): if True, use map as the metric. ensemble_method (str): method to perform the ensemble, options include "sum", and "max". """ self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() self.num_clips = num_clips self.overall_iters = overall_iters self.multi_label = multi_label self.ensemble_method = ensemble_method # Initialize tensors. self.video_preds = torch.zeros((num_videos, num_cls)) if multi_label: self.video_preds -= 1e10 self.video_labels = (torch.zeros( (num_videos, num_cls)) if multi_label else torch.zeros( (num_videos)).long()) self.clip_count = torch.zeros((num_videos)).long() self.topk_accs = [] self.stats = {} # Reset metric. self.reset() def reset(self): """ Reset the metric. """ self.clip_count.zero_() self.video_preds.zero_() if self.multi_label: self.video_preds -= 1e10 self.video_labels.zero_() def update_stats(self, preds, labels, clip_ids): """ Collect the predictions from the current batch and perform on-the-flight summation as ensemble. Args: preds (tensor): predictions from the current batch. Dimension is N x C where N is the batch size and C is the channel size (num_cls). labels (tensor): the corresponding labels of the current batch. Dimension is N. clip_ids (tensor): clip indexes of the current batch, dimension is N. """ for ind in range(preds.shape[0]): vid_id = int(clip_ids[ind]) // self.num_clips if self.video_labels[vid_id].sum() > 0: assert torch.equal( self.video_labels[vid_id].type(torch.FloatTensor), labels[ind].type(torch.FloatTensor), ) self.video_labels[vid_id] = labels[ind] if self.ensemble_method == "sum": self.video_preds[vid_id] += preds[ind] elif self.ensemble_method == "max": self.video_preds[vid_id] = torch.max(self.video_preds[vid_id], preds[ind]) else: raise NotImplementedError( "Ensemble Method {} is not supported".format( self.ensemble_method)) self.clip_count[vid_id] += 1 def log_iter_stats(self, cur_iter): """ Log the stats. Args: cur_iter (int): the current iteration of testing. """ eta_sec = self.iter_timer.seconds() * (self.overall_iters - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "split": "test_iter", "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "time_diff": self.iter_timer.seconds(), } logging.log_json_stats(stats) def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def finalize_metrics(self, ks=(1, 5)): """ Calculate and log the final ensembled metrics. ks (tuple): list of top-k values for topk_accuracies. For example, ks = (1, 5) correspods to top-1 and top-5 accuracy. """ if not all(self.clip_count == self.num_clips): logger.warning("clip count {} ~= num clips {}".format( ", ".join([ "{}: {}".format(i, k) for i, k in enumerate(self.clip_count.tolist()) ]), self.num_clips, )) self.stats = {"split": "test_final"} if self.multi_label: map = get_map(self.video_preds.cpu().numpy(), self.video_labels.cpu().numpy()) self.stats["map"] = map else: num_topks_correct = metrics.topks_correct(self.video_preds, self.video_labels, ks) topks = [(x / self.video_preds.size(0)) * 100.0 for x in num_topks_correct] assert len({len(ks), len(topks)}) == 1 for k, topk in zip(ks, topks): self.stats["top{}_acc".format(k)] = "{:.{prec}f}".format( topk, prec=2) logging.log_json_stats(self.stats)
class ValMeter(object): """ Measures validation stats. """ def __init__(self, max_iter, cfg): """ Args: max_iter (int): the max number of iteration of the current epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.max_iter = max_iter self.iter_timer = Timer() self.num_samples = 0 self.stats = {} def reset(self): """ Reset the Meter. """ self.iter_timer.reset() self.num_samples = 0 for k,v in self.stats.items(): self.stats[k].reset() def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() def update_stats(self, mb_size, **kwargs): #, top1_err, top5_err """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. mb_size (int): mini batch size. """ self.num_samples += mb_size for k,v in kwargs.items(): if k not in self.stats: self.stats[k] = ScalarMeter(self._cfg.LOG_PERIOD) self.stats[k].add_value(v) def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * (self.max_iter - cur_iter - 1) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() stats = { "_type": "val_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.max_iter), "time_diff": self.iter_timer.seconds(), "time_left": eta, # "top1_err": self.mb_top1_err.get_win_median(), # "top5_err": self.mb_top5_err.get_win_median(), "mem": int(np.ceil(mem_usage)), } for k, v in self.stats.items(): stats[k] = v.get_win_median() logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ mem_usage = misc.gpu_mem_usage() stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "mem": int(np.ceil(mem_usage)), } for k, v in self.stats.items(): stats[k] = v.get_global_avg() logging.log_json_stats(stats)
class TrainMeter(object): """ Measure training stats. """ def __init__(self, epoch_iters, cfg): """ Args: epoch_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.epoch_iters = epoch_iters self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters self.iter_timer = Timer() # self.loss = ScalarMeter(cfg.LOG_PERIOD) # self.loss_total = 0.0 self.lr = None # Current minibatch errors (smoothed over a window). # self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) # self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Number of misclassified examples. # self.num_top1_mis = 0 # self.num_top5_mis = 0 self.num_samples = 0 self.stats = {} def reset(self): """ Reset the Meter. """ # self.loss.reset() # self.loss_total = 0.0 self.lr = None # if self.mb_top1_err: # self.mb_top1_err.reset() # self.mb_top5_err.reset() # self.num_top1_mis = 0 # self.num_top5_mis = 0 self.num_samples = 0 for k,v in self.stats.items(): self.stats[k].reset() def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() def update_stats(self, lr, mb_size, **kwargs): #, top1_err, top5_err, loss """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ # Current minibatch stats # if self.mb_top1_err: # self.mb_top1_err.add_value(top1_err) # self.mb_top5_err.add_value(top5_err) # self.num_top1_mis += top1_err * mb_size # self.num_top5_mis += top5_err * mb_size for k,v in kwargs.items(): if k not in self.stats: self.stats[k] = ScalarMeter(self._cfg.LOG_PERIOD) self.stats[k].add_value(v) # self.loss.add_value(loss) self.lr = lr # Aggregate stats # self.loss_total += loss * mb_size self.num_samples += mb_size def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1) ) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time_diff": self.iter_timer.seconds(), "time_left": eta, # "top1_err": self.mb_top1_err.get_win_median(), # "top5_err": self.mb_top5_err.get_win_median(), # "loss": self.loss.get_win_median(), "lr": self.lr, "mem": int(np.ceil(mem_usage)), } for k, v in self.stats.items(): stats[k] = v.get_win_median() # if self.mb_top1_err: # stats = {**stats, **{"top1_err": self.mb_top1_err.get_win_median(), # "top5_err": self.mb_top5_err.get_win_median()}} logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters ) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() # top1_err = self.num_top1_mis / self.num_samples # top5_err = self.num_top5_mis / self.num_samples # avg_loss = self.loss_total / self.num_samples stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "time_left": eta, # "top1_err": top1_err, # "top5_err": top5_err, # "loss": avg_loss, "lr": self.lr, "mem": int(np.ceil(mem_usage)), } for k, v in self.stats.items(): stats[k] = v.get_global_avg() # if self.mb_top1_err: # stats = {**stats, **{"top1_err": top1_err, # "top5_err": top5_err}} logging.log_json_stats(stats)
def do_detect_upload(rtmpurl: str, analysisType='1|2|3'): try: cap = cv2.VideoCapture(rtmpurl) # import random # cap = cv2.VideoCapture("logs/video/mq17cc36c1000440-20200608182458.mp4") # cap.set(1, random.choice(range(int(float(cap.get(cv2.CAP_PROP_FRAME_COUNT)))))) # Out[5]: 108181.0 # frameRate = int(np.floor(cap.get(cv2.CAP_PROP_FPS))) # frameRate = frameRate if frameRate else 20 # count = 0 logger.debug(f'start det {rtmpurl} ...') # cap.set(3, yoyo.getsize()[0]) # w # cap.set(4, yoyo.getsize()[1]) # h # while True: # prev_time.reset() ret, frame_read = cap.read() if not ret: logger.debug("ret is not...") cap.release() return None, None tt = Timer() predicts, kitchen_img_resized = yoyo.darkdetect(frame_read) # ('uniform', 0.9847872257232666, (226.92221069335938, 266.7281188964844, 87.1346435546875, 198.78860473632812)) analysisType_predicts = [] predicts_killed = kill_duplicate_by_score(predicts, xou_thres=.65) for xx in analysisType.split('|'): if xx == '1': for yy in predicts_killed: if yy[0] in ['face-head', 'face-cap', 'mask-cap']: analysisType_predicts.append(yy) elif xx == '2': for yy in predicts_killed: if yy[0] in ['face-head', 'mask-cap', 'mask-head']: analysisType_predicts.append(yy) elif xx == '3': for yy in predicts_killed: if yy[0] in ['non-uniform', 'uniform']: analysisType_predicts.append(yy) # predicts = kill_duplicate_by_score(analysisType_predicts) predicts = analysisType_predicts tt.pause() logger.info( f'************** one shot detect time is {tt.seconds()} **************' ) vlz = myx_Visualizer(kitchen_img_resized, {"thing_classes": thing_classes}, instance_mode=1) # "pred_boxes":,"scores","pred_classes" instance = Instances( yoyo.getsize(), **{ "pred_boxes": np.array(list(map(convertBack, [x[2] for x in predicts]))), "scores": np.array([x[1] for x in predicts]), "pred_classes": np.array([thing_classes.index(x[0]) for x in predicts]) }) vout = vlz.draw_instance_predictions(predictions=instance) kitchen_img_resized = vout.get_image() logger.debug(instance) # count += frameRate # cap.set(1, count) cap.release() logger.debug(f'end det {rtmpurl} ...') tempfilename = upload(kitchen_img_resized) if len(tempfilename): cv2.imwrite(f'imgslogs/{tempfilename}', kitchen_img_resized) cv2.imwrite( f'imgslogs/nondetected/{tempfilename[:-4]}_nondetected.jpg', frame_read) with PathManager.open( f'imgslogs/predictions/{tempfilename[:-4]}.txt', 'w') as fid: fid.writelines(f'{predicts}') else: cv2.imwrite(f'imgslogs/{uuid.uuid4().hex}.jpg', kitchen_img_resized) cv2.imwrite(f'imgslogs/{uuid.uuid4().hex}_nondetected.jpg', frame_read) return predicts, tempfilename except: logger.error(traceback.format_exc()) logger.error(exec) logger.debug(f'end det {rtmpurl} with errors...') cap.release() return None, None
class TrainMeter(object): """ Measures training stats. """ def __init__(self, epoch_iters, cfg): """ Args: epoch_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.epoch_iters = epoch_iters self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.log_period = cfg.LOG_PERIOD self.infos = None self.num_samples = 0 def init(self, keys): self.infos = {} for key in keys: self.infos[key] = ScalarMeter(self.log_period) def reset(self): """ Reset the Meter. """ for k, v in self.infos.items(): v.reset() def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() def update_stats(self, info_dict): """ Update the current stats. Args: psnr (float): psnr loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ # Current minibatch stats if self.infos is None: self.init(info_dict.keys()) # reduce from all gpus if self._cfg.NUM_GPUS > 1: for k, v in info_dict.items(): info_dict[k] = du.all_reduce([v]) # syncronize from gpu to cpu info_dict = {k: v.item() for k, v in info_dict.items()} # log value into scalar meter for k, v in info_dict.items(): self.infos[k].add_value(v) def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1)) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time_diff": self.iter_timer.seconds(), "eta": eta, "mem": int(np.ceil(mem_usage)), } infos = {k: v.get_win_avg() for k, v in self.infos} stats.update(infos) logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "eta": eta, "mem": int(np.ceil(mem_usage)), } infos = {k: v.get_global_avg() for k, v in self.infos} stats.update(infos) logging.log_json_stats(stats)
class TrainMeter(object): """ Measure training stats. """ def __init__(self, epoch_iters, cfg): """ Args: epoch_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.epoch_iters = epoch_iters self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.loss_D = ScalarMeter(cfg.LOG_PERIOD) self.loss_G = ScalarMeter(cfg.LOG_PERIOD) self.appe_loss = ScalarMeter(cfg.LOG_PERIOD) self.flow_loss = ScalarMeter(cfg.LOG_PERIOD) self.loss_G_three_part = ScalarMeter(cfg.LOG_PERIOD) self.loss_D_total = 0.0 # loss_G,appe_loss,flow_loss,loss_G_total self.loss_G_total = 0.0 self.appe_loss_total = 0.0 self.flow_loss_total = 0.0 self.loss_G_three_part_total = 0.0 self.lr_G = None self.lr_D = None # Current minibatch errors (smoothed over a window). # self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) # self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Number of misclassified examples. self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 self.num_samples_G = 0 self.num_samples_D = 0 def reset(self): """ Reset the Meter. """ # self.loss.reset() # self.loss_total = 0.0 # self.lr = None self.loss_D.reset() self.loss_G.reset() self.appe_loss.reset() self.flow_loss.reset() self.loss_G_three_part.reset() self.loss_D_total = 0.0 self.loss_G_total = 0.0 self.appe_loss_total = 0.0 self.flow_loss_total = 0.0 self.loss_G_three_part_total = 0.0 self.lr_G = None self.lr_D = None # self.mb_top1_err.reset() # self.mb_top5_err.reset() # self.num_top1_mis = 0 # self.num_top5_mis = 0 self.num_samples = 0 self.num_samples_D = 0 self.num_samples_G = 0 def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() def update_stats(self, top1_err, top5_err, loss, lr, mb_size): """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ self.loss.add_value(loss) self.lr = lr self.loss_total += loss * mb_size self.num_samples += mb_size # if not self._cfg.DATA.MULTI_LABEL: # # Current minibatch stats # self.mb_top1_err.add_value(top1_err) # self.mb_top5_err.add_value(top5_err) # # Aggregate stats # self.num_top1_mis += top1_err * mb_size # self.num_top5_mis += top5_err * mb_size def update_stats_G(self, loss_G, appe_loss, flow_loss, loss_G_three_part, lr, mb_size): """ Update the current stats. Args: loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ self.loss_G.add_value(loss_G) self.appe_loss.add_value(appe_loss) self.flow_loss.add_value(flow_loss) self.loss_G_three_part.add_value(loss_G_three_part) # self.lr_G = lr # self.loss_total_G+= loss * mb_size self.loss_G_total += loss_G * mb_size self.appe_loss_total = appe_loss * mb_size self.flow_loss_total = flow_loss * mb_size self.loss_G_three_part_total = loss_G_three_part * mb_size self.num_samples_G += mb_size def update_stats_D(self, loss_D, lr, mb_size): """ Update the current stats of D . Args: loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ self.loss_D.add_value(loss_D) self.lr_D = lr self.loss_D_total += loss_D * mb_size self.num_samples_D += mb_size def log_iter_stats(self, cur_epoch, cur_iter, mode): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1)) eta = str(datetime.timedelta(seconds=int(eta_sec))) # stats in D or G if mode in ["D", "Discriminator"]: stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time_diff": self.iter_timer.seconds(), "eta": eta, "loss_D": self.loss_D.get_win_median(), "lr_D": self.lr_D, "gpu_mem": "{:.2f} GB".format(misc.gpu_mem_usage()), } elif mode in ["G", "Generator"]: stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time_diff": self.iter_timer.seconds(), "eta": eta, "loss_G": self.loss_G.get_win_median(), "appe_loss": self.appe_loss.get_win_median(), "flow_loss": self.flow_loss.get_win_median(), "three_part_loss_G": self.loss_G_three_part.get_win_median(), "lr_G": self.lr_G, "gpu_mem": "{:.2f} GB".format(misc.gpu_mem_usage()), } else: raise NotImplementedError("Does not support state") logging.log_json_stats(stats) # stats = { # "_type": "train_iter", # "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), # "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), # "time_diff": self.iter_timer.seconds(), # "eta": eta, # # "loss": self.loss.get_win_median(), # "lr": self.lr, # "gpu_mem": "{:.2f} GB".format(misc.gpu_mem_usage()), # } # if not self._cfg.DATA.MULTI_LABEL: # stats["top1_err"] = self.mb_top1_err.get_win_median() # stats["top5_err"] = self.mb_top5_err.get_win_median() def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters) eta = str(datetime.timedelta(seconds=int(eta_sec))) # stats in G or D stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "eta": eta, "lr_D": self.lr_D, "loss_D": self.loss_D_total / self.num_samples_D, "lr_G": self.lr_G, "loss_G": self.loss_G_total / self.num_samples_G, "appe_loss": self.appe_loss_total / self.num_samples_G, "flow_loss": self.flow_loss_total / self.num_samples_G, "total_G_loss": self.loss_G_three_part_total / self.num_samples_G, "gpu_mem": "{:.2f} GB".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f} GB".format(*misc.cpu_mem_usage()), } # avg_loss = self.loss_total_D / self.num_samples_D # stats["loss_D"] = avg_loss # stats = { # "_type": "train_epoch", # "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), # "time_diff": self.iter_timer.seconds(), # "eta": eta, # "lr": self.lr, # "gpu_mem": "{:.2f} GB".format(misc.gpu_mem_usage()), # "RAM": "{:.2f}/{:.2f} GB".format(*misc.cpu_mem_usage()), # } # if not self._cfg.DATA.MULTI_LABEL: # top1_err = self.num_top1_mis / self.num_samples # top5_err = self.num_top5_mis / self.num_samples # avg_loss = self.loss_total / self.num_samples # stats["top1_err"] = top1_err # stats["top5_err"] = top5_err # stats["loss"] = avg_loss logging.log_json_stats(stats)
class ValMeter(object): """ Measures validation stats. """ def __init__(self, max_iter, cfg): """ Args: max_iter (int): the max number of iteration of the current epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.max_iter = max_iter self.iter_timer = Timer() # Current minibatch errors (smoothed over a window). self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Min errors (over the full val set). self.min_top1_err = 100.0 self.min_top5_err = 100.0 # Number of misclassified examples. self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 def reset(self): """ Reset the Meter. """ self.iter_timer.reset() self.mb_top1_err.reset() self.mb_top5_err.reset() self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() def update_stats(self, top1_err, top5_err, mb_size): """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. mb_size (int): mini batch size. """ self.mb_top1_err.add_value(top1_err) self.mb_top5_err.add_value(top5_err) self.num_top1_mis += top1_err * mb_size self.num_top5_mis += top5_err * mb_size self.num_samples += mb_size def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * (self.max_iter - cur_iter - 1) eta = str(datetime.timedelta(seconds=int(eta_sec))) mem_usage = misc.gpu_mem_usage() stats = { "_type": "val_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.max_iter), "time_diff": self.iter_timer.seconds(), "eta": eta, "top1_err": self.mb_top1_err.get_win_median(), "top5_err": self.mb_top5_err.get_win_median(), "mem": int(np.ceil(mem_usage)), } logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples self.min_top1_err = min(self.min_top1_err, top1_err) self.min_top5_err = min(self.min_top5_err, top5_err) mem_usage = misc.gpu_mem_usage() stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "top1_err": top1_err, "top5_err": top5_err, "min_top1_err": self.min_top1_err, "min_top5_err": self.min_top5_err, "mem": int(np.ceil(mem_usage)), } logging.log_json_stats(stats) return self.min_top1_err
class IterationTimer(HookBase): """ Track the time spent for each iteration (each run_step call in the trainer). Print a summary in the end of training. This hook uses the time between the call to its :meth:`before_step` and :meth:`after_step` methods. Under the convention that :meth:`before_step` of all hooks should only take negligible amount of time, the :class:`IterationTimer` hook should be placed at the beginning of the list of hooks to obtain accurate timing. """ def __init__(self, warmup_iter=3): """ Args: warmup_iter (int): the number of iterations at the beginning to exclude from timing. """ self._warmup_iter = warmup_iter self._step_timer = Timer() self._start_time = time.perf_counter() self._total_timer = Timer() def before_train(self): self._start_time = time.perf_counter() self._total_timer.reset() self._total_timer.pause() def after_train(self): logger = logging.getLogger(__name__) total_time = time.perf_counter() - self._start_time total_time_minus_hooks = self._total_timer.seconds() hook_time = total_time - total_time_minus_hooks num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter if num_iter > 0 and total_time_minus_hooks > 0: # Speed is meaningful only after warmup # NOTE this format is parsed by grep in some scripts logger.info( "Overall training speed: {} iterations in {} ({:.4f} s / it)".format( num_iter, str(datetime.timedelta(seconds=int(total_time_minus_hooks))), total_time_minus_hooks / num_iter, ) ) logger.info( "Total training time: {} ({} on hooks)".format( str(datetime.timedelta(seconds=int(total_time))), str(datetime.timedelta(seconds=int(hook_time))), ) ) def before_step(self): self._step_timer.reset() self._total_timer.resume() def after_step(self): # +1 because we're in after_step iter_done = self.trainer.iter - self.trainer.start_iter + 1 if iter_done >= self._warmup_iter: sec = self._step_timer.seconds() self.trainer.storage.put_scalars(time=sec) else: self._start_time = time.perf_counter() self._total_timer.reset() self._total_timer.pause()
class TrainMeter(object): def __init__(self, epoch_iters, cfg): """ :param epoch_iters: iters in one epoch :param cfg: """ self._cfg = cfg self.epoch_iters = epoch_iters # self.loss=ScalarMeter(cfg.LOG_PERIOD) self.mse_loss = ScalarMeter(cfg.LOG_PERIOD) self.entropy_loss = ScalarMeter(cfg.LOG_PERIOD) self.combine_loss = ScalarMeter(cfg.LOG_PERIOD) self.iter_timer = Timer() self.lr = None # self.loss_total=0.0 self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters # self.num_samples=0 def reset(self): """ reset meter :return: """ self.lr = None self.mse_loss.reset() self.entropy_loss.reset() self.combine_loss.reset() # self.loss_total=0.0 def iter_start(self): """ start to recode time :return: """ self.iter_timer.reset() def iter_stop(self): """ stop recode time :return: """ self.iter_timer.pause() def update_stats(self, mse_loss, entropy_loss, combine_loss, lr, mb_size): self.mse_loss.add_value(mse_loss) self.entropy_loss.add_value(entropy_loss) self.combine_loss.add_value(combine_loss) self.lr = lr # self.loss_total+=loss*mb_size # self.num_samples+=mb_size def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats for cur iteration :param cur_epoch: :param cur_iter: :return: """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1)) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time": self.iter_timer.seconds(), "eta": eta, "mse_loss": self.mse_loss.get_win_median(), "entropy_loss": self.entropy_loss.get_win_median(), "combine_loss": self.combine_loss.get_win_median(), "lr": self.lr, "gpu": "{:.2f}GB".format(torch.cuda.max_memory_allocated() / 1024**3) } logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ :param cur_epoch: :return: """ stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "mse_loss": self.mse_loss.get_win_avg(), "entropy_loss": self.entropy_loss.get_win_avg(), "combine_loss": self.combine_loss.get_win_avg(), "gpu_mem": "{:.2f} GB".format(torch.cuda.max_memory_allocated() / 1024**3), } logging.log_json_stats(stats)
class AVAMeter(object): """ Measure the AVA train, val, and test stats. """ def __init__(self, overall_iters, cfg, mode): """ overall_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. mode (str): `train`, `val`, or `test` mode. """ self.cfg = cfg self.lr = None self.loss = ScalarMeter(cfg.LOG_PERIOD) self.full_ava_test = cfg.AVA.FULL_TEST_ON_VAL self.mode = mode self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() self.all_preds_train = [] self.all_ori_boxes_train = [] self.all_metadata_train = [] self.all_preds = [] self.all_ori_boxes = [] self.all_metadata = [] self.overall_iters = overall_iters self.categories, self.class_whitelist = read_labelmap( os.path.join(cfg.AVA.ANNOTATION_DIR, cfg.AVA.LABEL_MAP_FILE)) gt_filename = os.path.join(cfg.AVA.ANNOTATION_DIR, cfg.AVA.GROUNDTRUTH_FILE) self.full_groundtruth = read_csv(gt_filename, self.class_whitelist) self.mini_groundtruth = get_ava_mini_groundtruth(self.full_groundtruth) _, self.video_idx_to_name = ava_helper.load_image_lists( cfg, mode == "train") self.output_dir = cfg.OUTPUT_DIR def log_iter_stats(self, cur_epoch, cur_iter): """ Log the stats. Args: cur_epoch (int): the current epoch. cur_iter (int): the current iteration. """ if (cur_iter + 1) % self.cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * (self.overall_iters - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_sec))) if self.mode == "train": stats = { "_type": "{}_iter".format(self.mode), "cur_epoch": "{}".format(cur_epoch + 1), "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "mode": self.mode, "loss": self.loss.get_win_median(), "lr": self.lr, } elif self.mode == "val": stats = { "_type": "{}_iter".format(self.mode), "cur_epoch": "{}".format(cur_epoch + 1), "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "mode": self.mode, } elif self.mode == "test": stats = { "_type": "{}_iter".format(self.mode), "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "mode": self.mode, } else: raise NotImplementedError("Unknown mode: {}".format(self.mode)) logging.log_json_stats(stats) def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def reset(self): """ Reset the Meter. """ self.loss.reset() self.all_preds = [] self.all_ori_boxes = [] self.all_metadata = [] def update_stats(self, preds, ori_boxes, metadata, loss=None, lr=None): """ Update the current stats. Args: preds (tensor): prediction embedding. ori_boxes (tensor): original boxes (x1, y1, x2, y2). metadata (tensor): metadata of the AVA data. loss (float): loss value. lr (float): learning rate. """ if self.mode in ["val", "test"]: self.all_preds.append(preds) self.all_ori_boxes.append(ori_boxes) self.all_metadata.append(metadata) if self.mode in ["train"]: self.all_preds_train.append(preds) self.all_ori_boxes_train.append(ori_boxes) self.all_metadata_train.append(metadata) if loss is not None: self.loss.add_value(loss) if lr is not None: self.lr = lr def finalize_metrics(self, log=True): """ Calculate and log the final AVA metrics. """ all_preds = torch.cat(self.all_preds, dim=0) all_ori_boxes = torch.cat(self.all_ori_boxes, dim=0) all_metadata = torch.cat(self.all_metadata, dim=0) if self.mode == "test" or (self.full_ava_test and self.mode == "val"): groundtruth = self.full_groundtruth else: groundtruth = self.mini_groundtruth self.full_map = evaluate_ava( all_preds, all_ori_boxes, all_metadata.tolist(), self.class_whitelist, self.categories, groundtruth=groundtruth, video_idx_to_name=self.video_idx_to_name, ) if log: stats = {"mode": self.mode, "map": self.full_map} logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ if self.mode in ["val", "test"]: self.finalize_metrics(log=False) stats = { "_type": "{}_epoch".format(self.mode), "cur_epoch": "{}".format(cur_epoch + 1), "mode": self.mode, "map": self.full_map, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } logging.log_json_stats(stats)
class EPICValMeter(object): """ Measures validation stats. """ def __init__(self, max_iter, cfg): """ Args: max_iter (int): the max number of iteration of the current epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.max_iter = max_iter self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() # Current minibatch accuracies (smoothed over a window). self.mb_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_verb_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_verb_top5_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_noun_top1_acc = ScalarMeter(cfg.LOG_PERIOD) self.mb_noun_top5_acc = ScalarMeter(cfg.LOG_PERIOD) # Max accuracies (over the full val set). self.max_top1_acc = 0.0 self.max_top5_acc = 0.0 self.max_verb_top1_acc = 0.0 self.max_verb_top5_acc = 0.0 self.max_noun_top1_acc = 0.0 self.max_noun_top5_acc = 0.0 # Number of correctly classified examples. self.num_top1_cor = 0 self.num_top5_cor = 0 self.num_verb_top1_cor = 0 self.num_verb_top5_cor = 0 self.num_noun_top1_cor = 0 self.num_noun_top5_cor = 0 self.num_samples = 0 self.all_verb_preds = [] self.all_verb_labels = [] self.all_noun_preds = [] self.all_noun_labels = [] self.output_dir = cfg.OUTPUT_DIR def reset(self): """ Reset the Meter. """ self.iter_timer.reset() self.mb_top1_acc.reset() self.mb_top5_acc.reset() self.mb_verb_top1_acc.reset() self.mb_verb_top5_acc.reset() self.mb_noun_top1_acc.reset() self.mb_noun_top5_acc.reset() self.num_top1_cor = 0 self.num_top5_cor = 0 self.num_verb_top1_cor = 0 self.num_verb_top5_cor = 0 self.num_noun_top1_cor = 0 self.num_noun_top5_cor = 0 self.num_samples = 0 self.all_verb_preds = [] self.all_verb_labels = [] self.all_noun_preds = [] self.all_noun_labels = [] def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def update_stats(self, top1_acc, top5_acc, mb_size): """ Update the current stats. Args: top1_acc (float): top1 accuracy rate. top5_acc (float): top5 accuracy rate. mb_size (int): mini batch size. """ self.mb_verb_top1_acc.add_value(top1_acc[0]) self.mb_verb_top5_acc.add_value(top5_acc[0]) self.mb_noun_top1_acc.add_value(top1_acc[1]) self.mb_noun_top5_acc.add_value(top5_acc[1]) self.mb_top1_acc.add_value(top1_acc[2]) self.mb_top5_acc.add_value(top5_acc[2]) self.num_verb_top1_cor += top1_acc[0] * mb_size self.num_verb_top5_cor += top5_acc[0] * mb_size self.num_noun_top1_cor += top1_acc[1] * mb_size self.num_noun_top5_cor += top5_acc[1] * mb_size self.num_top1_cor += top1_acc[2] * mb_size self.num_top5_cor += top5_acc[2] * mb_size self.num_samples += mb_size def update_predictions(self, preds, labels): """ Update predictions and labels. Args: preds (tensor): model output predictions. labels (tensor): labels. """ # TODO: merge update_prediction with update_stats. self.all_verb_preds.append(preds[0]) self.all_verb_labels.append(labels[0]) self.all_noun_preds.append(preds[1]) self.all_noun_labels.append(labels[1]) def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * (self.max_iter - cur_iter - 1) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "val_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.max_iter), "time_diff": self.iter_timer.seconds(), "eta": eta, "verb_top1_acc": self.mb_verb_top1_acc.get_win_median(), "verb_top5_acc": self.mb_verb_top5_acc.get_win_median(), "noun_top1_acc": self.mb_noun_top1_acc.get_win_median(), "noun_top5_acc": self.mb_noun_top5_acc.get_win_median(), "top1_acc": self.mb_top1_acc.get_win_median(), "top5_acc": self.mb_top5_acc.get_win_median(), "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), } logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ verb_top1_acc = self.num_verb_top1_cor / self.num_samples verb_top5_acc = self.num_verb_top5_cor / self.num_samples noun_top1_acc = self.num_noun_top1_cor / self.num_samples noun_top5_acc = self.num_noun_top5_cor / self.num_samples top1_acc = self.num_top1_cor / self.num_samples top5_acc = self.num_top5_cor / self.num_samples self.max_verb_top1_acc = max(self.max_verb_top1_acc, verb_top1_acc) self.max_verb_top5_acc = max(self.max_verb_top5_acc, verb_top5_acc) self.max_noun_top1_acc = max(self.max_noun_top1_acc, noun_top1_acc) self.max_noun_top5_acc = max(self.max_noun_top5_acc, noun_top5_acc) is_best_epoch = top1_acc > self.max_top1_acc self.max_top1_acc = max(self.max_top1_acc, top1_acc) self.max_top5_acc = max(self.max_top5_acc, top5_acc) stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "verb_top1_acc": verb_top1_acc, "verb_top5_acc": verb_top5_acc, "noun_top1_acc": noun_top1_acc, "noun_top5_acc": noun_top5_acc, "top1_acc": top1_acc, "top5_acc": top5_acc, "max_verb_top1_acc": self.max_verb_top1_acc, "max_verb_top5_acc": self.max_verb_top5_acc, "max_noun_top1_acc": self.max_noun_top1_acc, "max_noun_top5_acc": self.max_noun_top5_acc, "max_top1_acc": self.max_top1_acc, "max_top5_acc": self.max_top5_acc, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } logging.log_json_stats(stats) return is_best_epoch, {"top1_acc": top1_acc, "verb_top1_acc": verb_top1_acc, "noun_top1_acc": noun_top1_acc}
class ValMeter(object): """ Measures validation stats. """ def __init__(self, max_iter, cfg): """ Args: max_iter (int): the max number of iteration of the current epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.max_iter = max_iter self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() # Current minibatch errors (smoothed over a window). self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Min errors (over the full val set). self.min_top1_err = 100.0 self.min_top5_err = 100.0 # Number of misclassified examples. self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 self.all_preds = [] self.all_labels = [] self.output_dir = cfg.OUTPUT_DIR def reset(self): """ Reset the Meter. """ self.iter_timer.reset() self.mb_top1_err.reset() self.mb_top5_err.reset() self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 self.all_preds = [] self.all_labels = [] def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def update_stats(self, top1_err, top5_err, mb_size): """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. mb_size (int): mini batch size. """ self.mb_top1_err.add_value(top1_err) self.mb_top5_err.add_value(top5_err) self.num_top1_mis += top1_err * mb_size self.num_top5_mis += top5_err * mb_size self.num_samples += mb_size def update_predictions(self, preds, labels): """ Update predictions and labels. Args: preds (tensor): model output predictions. labels (tensor): labels. """ # TODO: merge update_prediction with update_stats. self.all_preds.append(preds) self.all_labels.append(labels) def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * (self.max_iter - cur_iter - 1) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "val_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.max_iter), "time_diff": self.iter_timer.seconds(), "eta": eta, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: stats["top1_err"] = self.mb_top1_err.get_win_median() stats["top5_err"] = self.mb_top5_err.get_win_median() logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } if self._cfg.DATA.MULTI_LABEL: stats["map"] = get_map( torch.cat(self.all_preds).cpu().numpy(), torch.cat(self.all_labels).cpu().numpy(), ) else: top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples self.min_top1_err = min(self.min_top1_err, top1_err) self.min_top5_err = min(self.min_top5_err, top5_err) stats["top1_err"] = top1_err stats["top5_err"] = top5_err stats["min_top1_err"] = self.min_top1_err stats["min_top5_err"] = self.min_top5_err logging.log_json_stats(stats)
class RecMeter(object): def __init__(self, epoch_iters, cfg, mode): """ Args: epoch_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): settings. """ self._cfg = cfg self._mode = mode self.epoch_iters = epoch_iters self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.loss = ScalarMeter(cfg.LOG_PERIOD) self.loss_total = 0.0 self.lr = None # Current minibatch errors (smoothed over a window). self.avg_pre = ScalarMeter(cfg.LOG_PERIOD) self.avg_rec = ScalarMeter(cfg.LOG_PERIOD) self.avg_f1 = ScalarMeter(cfg.LOG_PERIOD) self.num_samples = 0 self.all_label_preds = [] self.all_act_preds = [] self.all_obj_preds = [] self.all_labels = [] self.all_act_labels = [] self.all_obj_labels = [] self.tp = 0 self.fp = 0 self.fn = 0 def reset(self): """ Reset the Meter. """ self.loss.reset() self.loss_total = 0.0 self.lr = None self.avg_pre.reset() self.avg_rec.reset() self.avg_f1.reset() self.num_samples = 0 self.all_label_preds = [] self.all_act_preds = [] self.all_obj_preds = [] self.all_labels = [] self.all_act_labels = [] self.all_obj_labels = [] self.tp = 0 self.fp = 0 self.fn = 0 def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() def update_stats(self, tp, fp, fn, label_preds, act_preds, obj_preds, labels, gt_act_labels, gt_obj_labels, loss, lr, mb_size): """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ # Current minibatch stats if tp == 0: avg_pre = 0 avg_rec = 0 avg_f1 = 0 else: avg_pre = tp / (tp + fp) avg_rec = tp / (tp + fn) avg_f1 = (2.0 * tp) / (2.0 * tp + fn + fp) self.avg_pre.add_value(avg_pre) self.avg_rec.add_value(avg_rec) self.avg_f1.add_value(avg_f1) self.tp += tp self.fn += fn self.fp += fp self.num_samples += mb_size self.all_label_preds.append(label_preds) self.all_act_preds.append(act_preds) self.all_obj_preds.append(obj_preds) self.all_labels.append(labels) self.all_act_labels.append(gt_act_labels) self.all_obj_labels.append(gt_obj_labels) if self._mode == 'train': self.loss.add_value(loss) self.loss_total += loss * mb_size self.lr = lr def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1)) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "R": "[{}/{}|{}/{}]".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH, cur_iter + 1, self.epoch_iters), "eta": eta, "pre": self.avg_pre.get_win_median(), "rec": self.avg_rec.get_win_median(), "f1": self.avg_f1.get_win_median(), } if self._mode == 'train': stats["id"] = "train_iter" stats["loss"] = self.loss.get_win_median() stats["lr"] = self.lr else: stats["id"] = "val_iter" logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters) eta = str(datetime.timedelta(seconds=int(eta_sec))) results = self.finalize_metrics() stats = { "R": "[{}/{}]".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "eta": eta, } if self._mode == 'train': stats["loss"] = self.loss_total / self.num_samples stats["lr"] = self.lr stats["id"] = "train" else: stats["id"] = "val" log_list = ['prec', 'rec', 'f1'] for key in results.keys(): # for log_idx, val in enumerate(log_list): stats['{}'.format(key)] = results[key][:-1] logging.log_json_stats(stats) save_path = Path(self._cfg.OUTPUT_DIR) / 'log' if not save_path.exists(): save_path.mkdir(parents=True) with (save_path / '{}_{}.json'.format(self._mode, cur_epoch)).open('w') as f: json.dump(stats, f) def finalize_metrics(self): results = dict() all_preds = torch.cat(self.all_label_preds, dim=0).numpy() all_labels = torch.cat(self.all_labels, dim=0).numpy() all_act_preds = torch.cat(self.all_act_preds, dim=0).numpy() all_act_labels = (torch.cat(self.all_act_labels, dim=0) + 1) # Add one additional dimension for all_act_labels = (torch.sum( torch.zeros(all_act_labels.size(0), all_act_labels.size(1), (len(Metadata.action) + 1)).scatter_( -1, all_act_labels, 1)[:, :, 1:], dim=1) > 0).type(torch.float).numpy() all_obj_preds = torch.cat(self.all_obj_preds, dim=0) all_obj_preds = all_obj_preds.view(-1, all_obj_preds.size(-1)).numpy() all_obj_labels = torch.cat(self.all_obj_labels, dim=0) all_obj_labels = all_obj_labels.view(-1, all_obj_labels.size(-1)).numpy() # binary cross entropy results['hois'] = sk_metrics.precision_recall_fscore_support( all_labels, all_preds, labels=list(range(len(Metadata.hoi))), average='micro') results['actions'] = sk_metrics.precision_recall_fscore_support( all_act_labels, all_act_preds, labels=list(range(len(Metadata.action))), average='micro') results['objects'] = sk_metrics.precision_recall_fscore_support( all_obj_labels, all_obj_preds, labels=list(range(len(Metadata.object))), average='micro') return results