def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = int(20 / cfg.NUM_GPUS) # Output logging period in SGD iterations self.LOG_PERIOD = int(20 /cfg.NUM_GPUS) self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model
def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.cur_epoch_total_loss = [] self.last_epoch_mean_loss = 0 self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model self.cur_epoch = 0
def __init__(self, model, tensorflow_board=None): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model self.tblogger = tensorflow_board self.tb_ignored_keys = ['iter', 'eta', 'mb_qsize', 'mem', 'time']
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = int(20 / cfg.NUM_GPUS) # Output logging period in SGD iterations self.LOG_PERIOD = int(20 /cfg.NUM_GPUS) self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses]) ) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize() ) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" num_iter_per_epoch = self.model.roi_data_loader.get_num_iter_per_epoch() if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER * num_iter_per_epoch - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): num_iter_per_epoch = self.model.roi_data_loader.get_num_iter_per_epoch() eta_seconds = self.iter_timer.average_time * ( cfg.SOLVER.MAX_ITER * num_iter_per_epoch - cur_iter ) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict( iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetAverageValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetAverageValue()) ), mem=int(np.ceil(mem_usage / 1024 / 1024)) ) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetAverageValue() return stats
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = int(1280 / cfg.NUM_GPUS) # Output logging period in SGD iterations self.LOG_PERIOD = int(1280 / cfg.NUM_GPUS) self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model self.mem = dict() self.mem = None def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses])) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize()) if self.mem is not None: self.GetMem() def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) if self.mem is not None: mem_sorted = sorted(self.mem.items(), key=lambda d: d[1]) print(mem_sorted) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict(iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetAverageValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetAverageValue())), mem=int(np.ceil(mem_usage / 1024 / 1024))) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetAverageValue() return stats def is_grad(self, b): name = str(b) return name.endswith("_grad") def is_shared(self, b): name = str(b) return name.endswith("_shared") def GetMem(self): for op_idx in range(len(self.model.net._net.op)): op = self.model.net._net.op[op_idx] for b in list(op.output): if self.is_grad(b): pass elif self.is_shared(b): pass else: continue blob = workspace.FetchBlob(str(b)) if b not in self.mem.keys(): self.mem[str(b)] = 0 self.mem[str(b)] = max(self.mem[str(b)], blob.size)
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 1 # Output logging period in SGD iterations self.LOG_PERIOD = 1 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses])) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize()) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict( iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, mb_qsize=int(np.round(self.smoothed_mb_qsize.GetMedianValue())), mem=int(np.ceil(mem_usage / 1024 / 1024)), ) if cfg.TRAIN.DA_FADE_IN: stats['da_weight'] = self.model.da_fade_in.get_weight() if cfg.TRAIN.PADA: stats[ 'avg_pada_weight'] = self.model.class_weight_db.get_avg_pada_weight( ) stats[ 'total_detects'] = self.model.class_weight_db.total_sum_softmax.sum( ) / 2 stats['KL_div'] = self.model.class_weight_db.get_KL_to_init() stats['accuracy_fg'] = self.model.class_weight_db.fg_acc.get() stats[ 'acc_fg_weighted'] = self.model.class_weight_db.weighted_fg_acc.get( ) target_dist = self.model.class_weight_db.get_dist() print('target_dist: {}'.format(list(target_dist))) class_weights = self.model.class_weight_db.class_weights print('class_weights: {}'.format(list(class_weights))) classes = np.array( dummy_datasets.get_coco_dataset().classes.values(), dtype=str) for dist in [target_dist, class_weights]: order = np.argsort(dist)[::-1] o_target_dist = target_dist[order] o_classes = classes[order] cwo = class_weights[order] print("dist tops: ", end='') for prob, w, c in list(zip(o_target_dist, cwo, o_classes))[:5]: print("{}:{:.3f} ({:.3f})".format(c, prob, w), end='; ') print() print() for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() # for k,v in stats.items(): # print(k,v) return stats
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model, tensorflow_board=None): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model self.tblogger = tensorflow_board self.tb_ignored_keys = ['iter', 'eta', 'mb_qsize', 'mem', 'time'] def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses])) self.smoothed_total_loss.AddValue(self.iter_total_loss) self.smoothed_mb_qsize.AddValue( self.model.roi_data_loader._minibatch_queue.qsize()) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) if self.tblogger: self.tb_log_stats(stats, cur_iter) def tb_log_stats(self, stats, cur_iter): """Log the tracked statistics to tensorboard""" for k in stats: if k not in self.tb_ignored_keys: v = stats[k] if isinstance(v, dict): self.tb_log_stats(v, cur_iter) else: self.tblogger.write_scalars({k: v}, cur_iter) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict(iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, mb_qsize=int( np.round(self.smoothed_mb_qsize.GetMedianValue())), mem=int(np.ceil(mem_usage / 1024 / 1024))) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() return stats
class TrainingStats(object): """Track vital training statistics.""" def __init__(self, model, writer): # Window size for smoothing tracked values (with median filtering) self.WIN_SZ = 20 # Output logging period in SGD iterations self.LOG_PERIOD = 20 self.smoothed_losses_and_metrics = { key: SmoothedValue(self.WIN_SZ) for key in model.losses + model.metrics } self.losses_and_metrics = { key: 0 for key in model.losses + model.metrics } self.smoothed_total_loss = SmoothedValue(self.WIN_SZ) self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ) self.iter_total_loss = np.nan self.iter_timer = Timer() self.model = model self.writer = writer def IterTic(self): self.iter_timer.tic() def IterToc(self): return self.iter_timer.toc(average=False) def ResetIterTimer(self): self.iter_timer.reset() def UpdateIterStats(self, i_iter=0): """Update tracked iteration statistics.""" for k in self.losses_and_metrics.keys(): if k in self.model.losses: self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k) else: self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k) for k, v in self.smoothed_losses_and_metrics.items(): v.AddValue(self.losses_and_metrics[k]) self.writer.add_scalar(k, self.losses_and_metrics[k], i_iter) self.iter_total_loss = np.sum( np.array([self.losses_and_metrics[k] for k in self.model.losses]) ) self.smoothed_total_loss.AddValue(self.iter_total_loss) # self.smoothed_mb_qsize.AddValue( # self.model.roi_data_loader._minibatch_queue.qsize() # ) def LogIterStats(self, cur_iter, lr): """Log the tracked statistics.""" if (cur_iter % self.LOG_PERIOD == 0 or cur_iter == cfg.SOLVER.MAX_ITER - 1): stats = self.GetStats(cur_iter, lr) log_json_stats(stats) def GetStats(self, cur_iter, lr): eta_seconds = self.iter_timer.average_time * ( cfg.SOLVER.MAX_ITER - cur_iter ) eta = str(datetime.timedelta(seconds=int(eta_seconds))) mem_stats = c2_py_utils.GetGPUMemoryUsageStats() mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS]) stats = dict( iter=cur_iter, lr=float(lr), time=self.iter_timer.average_time, loss=self.smoothed_total_loss.GetMedianValue(), eta=eta, # mb_qsize=int( # np.round(self.smoothed_mb_qsize.GetMedianValue()) # ), mem=int(np.ceil(mem_usage / 1024 / 1024)) ) for k, v in self.smoothed_losses_and_metrics.items(): stats[k] = v.GetMedianValue() return stats def SaveTrainingStates(self, save_file): save_state = { key:0 for key in self.smoothed_losses_and_metrics.keys() } save_smooth_state = { key:0 for key in self.smoothed_losses_and_metrics.keys() } for k, v in self.smoothed_losses_and_metrics.items(): save_state[k] = v.series save_smooth_state[k] = v.smooth_series save_state['total_loss'] = self.smoothed_total_loss.series save_smooth_state['total_loss'] = self.smoothed_total_loss.series # print(save_state) with open(save_file, 'w') as f: json.dump(save_state, f) # save smooth state save_smooth_file = save_file.replace('.json', '_smooth.json') with open(save_smooth_file, 'w') as f: json.dump(save_smooth_state, f)