def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "eta": eta, "lr": self.lr, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples avg_loss = self.loss_total / self.num_samples stats["top1_err"] = top1_err stats["top5_err"] = top5_err stats["loss"] = avg_loss logging.log_json_stats(stats)
def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } if self._cfg.DATA.MULTI_LABEL: stats["map"] = get_map( torch.cat(self.all_preds).cpu().numpy(), torch.cat(self.all_labels).cpu().numpy(), ) else: top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples self.min_top1_err = min(self.min_top1_err, top1_err) self.min_top5_err = min(self.min_top5_err, top5_err) stats["top1_err"] = top1_err stats["top5_err"] = top5_err stats["min_top1_err"] = self.min_top1_err stats["min_top5_err"] = self.min_top5_err logging.log_json_stats(stats)
def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1)) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time_diff": self.iter_timer.seconds(), "eta": eta, "loss": self.loss.get_win_median(), "lr": self.lr, "gpu_mem": "{:.2f} GB".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f} GB".format(*misc.cpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: stats["top1_err"] = self.mb_top1_err.get_win_median() stats["top5_err"] = self.mb_top5_err.get_win_median() logging.log_json_stats(stats)
def log_epoch_stats(self, cur_epoch, preds=[], labels=[]): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ if self.num_samples <= 0: self.logger.warning( "TrainMeter log_epoch_stats numSample {}".format( self.num_samples)) return None eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "eta": eta, "lr": self.lr, "gpu_mem": "{:.2f} GB".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f} GB".format(*misc.cpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples avg_loss = self.loss_total / self.num_samples stats["top1_err"] = top1_err stats["top5_err"] = top5_err stats["loss"] = avg_loss if len(preds) > 0 and len(labels) > 0: calc_binary_stats(preds, labels, stats, self._cfg) self.logger.info(stats) return stats
def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ if self.mode in ["val", "test"]: self.finalize_metrics(log=False) stats = { "_type": "{}_epoch".format(self.mode), "cur_epoch": "{}".format(cur_epoch + 1), "mode": self.mode, "map": self.full_map, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } logging.log_json_stats(stats)
def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ verb_top1_acc = self.num_verb_top1_cor / self.num_samples verb_top5_acc = self.num_verb_top5_cor / self.num_samples noun_top1_acc = self.num_noun_top1_cor / self.num_samples noun_top5_acc = self.num_noun_top5_cor / self.num_samples top1_acc = self.num_top1_cor / self.num_samples top5_acc = self.num_top5_cor / self.num_samples self.max_verb_top1_acc = max(self.max_verb_top1_acc, verb_top1_acc) self.max_verb_top5_acc = max(self.max_verb_top5_acc, verb_top5_acc) self.max_noun_top1_acc = max(self.max_noun_top1_acc, noun_top1_acc) self.max_noun_top5_acc = max(self.max_noun_top5_acc, noun_top5_acc) is_best_epoch = top1_acc > self.max_top1_acc self.max_top1_acc = max(self.max_top1_acc, top1_acc) self.max_top5_acc = max(self.max_top5_acc, top5_acc) stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "verb_top1_acc": verb_top1_acc, "verb_top5_acc": verb_top5_acc, "noun_top1_acc": noun_top1_acc, "noun_top5_acc": noun_top5_acc, "top1_acc": top1_acc, "top5_acc": top5_acc, "max_verb_top1_acc": self.max_verb_top1_acc, "max_verb_top5_acc": self.max_verb_top5_acc, "max_noun_top1_acc": self.max_noun_top1_acc, "max_noun_top5_acc": self.max_noun_top5_acc, "max_top1_acc": self.max_top1_acc, "max_top5_acc": self.max_top5_acc, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } logging.log_json_stats(stats) return is_best_epoch, {"top1_acc": top1_acc, "verb_top1_acc": verb_top1_acc, "noun_top1_acc": noun_top1_acc}
def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters ) eta = str(datetime.timedelta(seconds=int(eta_sec))) verb_top1_acc = self.num_verb_top1_cor / self.num_samples verb_top5_acc = self.num_verb_top5_cor / self.num_samples noun_top1_acc = self.num_noun_top1_cor / self.num_samples noun_top5_acc = self.num_noun_top5_cor / self.num_samples top1_acc = self.num_top1_cor / self.num_samples top5_acc = self.num_top5_cor / self.num_samples avg_loss_verb = self.loss_verb_total / self.num_samples avg_loss_noun = self.loss_noun_total / self.num_samples avg_loss = self.loss_total / self.num_samples stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "eta": eta, "verb_top1_acc": verb_top1_acc, "verb_top5_acc": verb_top5_acc, "noun_top1_acc": noun_top1_acc, "noun_top5_acc": noun_top5_acc, "top1_acc": top1_acc, "top5_acc": top5_acc, "verb_loss": avg_loss_verb, "noun_loss": avg_loss_noun, "loss": avg_loss, "lr": self.lr, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } logging.log_json_stats(stats)
def log_epoch_stats(self, cur_epoch, preds=[], labels=[]): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ if self.num_samples <= 0: self.logger.warning("ValMeter log_epoch_stats numSample {}".format( self.num_samples)) return None stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "gpu_mem": "{:.2f} GB".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f} GB".format(*misc.cpu_mem_usage()), } if self._cfg.DATA.MULTI_LABEL: stats["map"] = get_map( torch.cat(self.all_preds).cpu().numpy(), torch.cat(self.all_labels).cpu().numpy(), ) else: top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples self.min_top1_err = min(self.min_top1_err, top1_err) self.min_top5_err = min(self.min_top5_err, top5_err) stats["top1_err"] = top1_err stats["top5_err"] = top5_err stats["min_top1_err"] = self.min_top1_err stats["min_top5_err"] = self.min_top5_err if len(preds) > 0 and len(labels) > 0: calc_binary_stats(preds, labels, stats, self._cfg) self.logger.info(stats) return stats
def benchmark_data_loading(cfg): """ Benchmark the speed of data loading in PySlowFast. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Benchmark data loading with config:") logger.info(pprint.pformat(cfg)) timer = Timer() dataloader = loader.construct_loader(cfg, "train") logger.info("Initialize loader using {:.2f} seconds.".format( timer.seconds())) # Total batch size across different machines. batch_size = cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS log_period = cfg.BENCHMARK.LOG_PERIOD epoch_times = [] # Test for a few epochs. for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS): timer = Timer() timer_epoch = Timer() iter_times = [] for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)): if cur_iter > 0 and cur_iter % log_period == 0: iter_times.append(timer.seconds()) ram_usage, ram_total = misc.cpu_mem_usage() logger.info( "Epoch {}: {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, log_period, log_period * batch_size, iter_times[-1], ram_usage, ram_total, )) timer.reset() epoch_times.append(timer_epoch.seconds()) ram_usage, ram_total = misc.cpu_mem_usage() logger.info( "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, len(dataloader), len(dataloader) * batch_size, epoch_times[-1], ram_usage, ram_total, )) logger.info( "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} " "(avg/std) seconds.".format( cur_epoch, log_period, log_period * batch_size, np.mean(iter_times), np.std(iter_times), )) logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} " "(avg/std) seconds.".format( len(dataloader) * batch_size, np.mean(epoch_times), np.std(epoch_times), ))