Esempio n. 1
0
class TrainingStats(object):
    """Track vital training statistics."""

    def __init__(self, model):
        # Window size for smoothing tracked values (with median filtering)
        self.WIN_SZ = int(20 / cfg.NUM_GPUS)
        # Output logging period in SGD iterations
        self.LOG_PERIOD = int(20 /cfg.NUM_GPUS)
        self.smoothed_losses_and_metrics = {
            key: SmoothedValue(self.WIN_SZ)
            for key in model.losses + model.metrics
        }
        self.losses_and_metrics = {
            key: 0
            for key in model.losses + model.metrics
        }
        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
        self.iter_total_loss = np.nan
        self.iter_timer = Timer()
        self.model = model

    def IterTic(self):
        self.iter_timer.tic()

    def IterToc(self):
        return self.iter_timer.toc(average=False)

    def ResetIterTimer(self):
        self.iter_timer.reset()

    def UpdateIterStats(self):
        """Update tracked iteration statistics."""
        for k in self.losses_and_metrics.keys():
            if k in self.model.losses:
                self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k)
            else:
                self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)
        for k, v in self.smoothed_losses_and_metrics.items():
            v.AddValue(self.losses_and_metrics[k])
        self.iter_total_loss = np.sum(
            np.array([self.losses_and_metrics[k] for k in self.model.losses])
        )
        self.smoothed_total_loss.AddValue(self.iter_total_loss)
        self.smoothed_mb_qsize.AddValue(
            self.model.roi_data_loader._minibatch_queue.qsize()
        )

    def LogIterStats(self, cur_iter, lr):
        """Log the tracked statistics."""
        num_iter_per_epoch = self.model.roi_data_loader.get_num_iter_per_epoch()
        if (cur_iter % self.LOG_PERIOD == 0 or
                cur_iter == cfg.SOLVER.MAX_ITER * num_iter_per_epoch - 1):
            stats = self.GetStats(cur_iter, lr)
            log_json_stats(stats)

    def GetStats(self, cur_iter, lr):
        num_iter_per_epoch = self.model.roi_data_loader.get_num_iter_per_epoch()
        eta_seconds = self.iter_timer.average_time * (
            cfg.SOLVER.MAX_ITER * num_iter_per_epoch - cur_iter
        )
        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
        stats = dict(
            iter=cur_iter,
            lr=float(lr),
            time=self.iter_timer.average_time,
            loss=self.smoothed_total_loss.GetAverageValue(),
            eta=eta,
            mb_qsize=int(
                np.round(self.smoothed_mb_qsize.GetAverageValue())
            ),
            mem=int(np.ceil(mem_usage / 1024 / 1024))
        )
        for k, v in self.smoothed_losses_and_metrics.items():
            stats[k] = v.GetAverageValue()
        return stats
Esempio n. 2
0
class TrainingStats(object):
    """Track vital training statistics."""
    def __init__(self, model):
        # Window size for smoothing tracked values (with median filtering)
        self.WIN_SZ = int(1280 / cfg.NUM_GPUS)
        # Output logging period in SGD iterations
        self.LOG_PERIOD = int(1280 / cfg.NUM_GPUS)
        self.smoothed_losses_and_metrics = {
            key: SmoothedValue(self.WIN_SZ)
            for key in model.losses + model.metrics
        }
        self.losses_and_metrics = {
            key: 0
            for key in model.losses + model.metrics
        }
        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
        self.iter_total_loss = np.nan
        self.iter_timer = Timer()
        self.model = model

        self.mem = dict()
        self.mem = None

    def IterTic(self):
        self.iter_timer.tic()

    def IterToc(self):
        return self.iter_timer.toc(average=False)

    def ResetIterTimer(self):
        self.iter_timer.reset()

    def UpdateIterStats(self):
        """Update tracked iteration statistics."""
        for k in self.losses_and_metrics.keys():
            self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)
        for k, v in self.smoothed_losses_and_metrics.items():
            v.AddValue(self.losses_and_metrics[k])
        self.iter_total_loss = np.sum(
            np.array([self.losses_and_metrics[k] for k in self.model.losses]))
        self.smoothed_total_loss.AddValue(self.iter_total_loss)
        self.smoothed_mb_qsize.AddValue(
            self.model.roi_data_loader._minibatch_queue.qsize())

        if self.mem is not None:
            self.GetMem()

    def LogIterStats(self, cur_iter, lr):
        """Log the tracked statistics."""
        if (cur_iter % self.LOG_PERIOD == 0
                or cur_iter == cfg.SOLVER.MAX_ITER - 1):
            stats = self.GetStats(cur_iter, lr)
            log_json_stats(stats)

        if self.mem is not None:
            mem_sorted = sorted(self.mem.items(), key=lambda d: d[1])
            print(mem_sorted)

    def GetStats(self, cur_iter, lr):
        eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER -
                                                      cur_iter)
        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
        stats = dict(iter=cur_iter,
                     lr=float(lr),
                     time=self.iter_timer.average_time,
                     loss=self.smoothed_total_loss.GetAverageValue(),
                     eta=eta,
                     mb_qsize=int(
                         np.round(self.smoothed_mb_qsize.GetAverageValue())),
                     mem=int(np.ceil(mem_usage / 1024 / 1024)))
        for k, v in self.smoothed_losses_and_metrics.items():
            stats[k] = v.GetAverageValue()
        return stats

    def is_grad(self, b):
        name = str(b)

        return name.endswith("_grad")

    def is_shared(self, b):
        name = str(b)

        return name.endswith("_shared")

    def GetMem(self):
        for op_idx in range(len(self.model.net._net.op)):
            op = self.model.net._net.op[op_idx]
            for b in list(op.output):
                if self.is_grad(b):
                    pass
                elif self.is_shared(b):
                    pass
                else:
                    continue

                blob = workspace.FetchBlob(str(b))
                if b not in self.mem.keys():
                    self.mem[str(b)] = 0
                self.mem[str(b)] = max(self.mem[str(b)], blob.size)
class TrainingStats(object):
    """Track vital training statistics."""
    def __init__(self, model, tensorflow_board=None):
        # Window size for smoothing tracked values (with median filtering)
        self.WIN_SZ = 20
        # Output logging period in SGD iterations
        self.LOG_PERIOD = 20
        self.smoothed_losses_and_metrics = {
            key: SmoothedValue(self.WIN_SZ)
            for key in model.losses + model.metrics
        }
        self.losses_and_metrics = {
            key: 0
            for key in model.losses + model.metrics
        }
        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
        self.iter_total_loss = np.nan
        self.iter_timer = Timer()
        self.model = model

        self.tblogger = tensorflow_board
        self.tb_ignored_keys = ['iter', 'eta', 'mb_qsize', 'mem', 'time']

    def IterTic(self):
        self.iter_timer.tic()

    def IterToc(self):
        return self.iter_timer.toc(average=False)

    def ResetIterTimer(self):
        self.iter_timer.reset()

    def UpdateIterStats(self):
        """Update tracked iteration statistics."""
        for k in self.losses_and_metrics.keys():
            if k in self.model.losses:
                self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k)
            else:
                self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)
        for k, v in self.smoothed_losses_and_metrics.items():
            v.AddValue(self.losses_and_metrics[k])
        self.iter_total_loss = np.sum(
            np.array([self.losses_and_metrics[k] for k in self.model.losses]))
        self.smoothed_total_loss.AddValue(self.iter_total_loss)
        self.smoothed_mb_qsize.AddValue(
            self.model.roi_data_loader._minibatch_queue.qsize())

    def LogIterStats(self, cur_iter, lr):
        """Log the tracked statistics."""
        if (cur_iter % self.LOG_PERIOD == 0
                or cur_iter == cfg.SOLVER.MAX_ITER - 1):
            stats = self.GetStats(cur_iter, lr)
            log_json_stats(stats)

            if self.tblogger:
                self.tb_log_stats(stats, cur_iter)

    def tb_log_stats(self, stats, cur_iter):
        """Log the tracked statistics to tensorboard"""
        for k in stats:
            if k not in self.tb_ignored_keys:
                v = stats[k]
                if isinstance(v, dict):
                    self.tb_log_stats(v, cur_iter)
                else:
                    self.tblogger.write_scalars({k: v}, cur_iter)

    def GetStats(self, cur_iter, lr):
        eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER -
                                                      cur_iter)
        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
        stats = dict(iter=cur_iter,
                     lr=float(lr),
                     time=self.iter_timer.average_time,
                     loss=self.smoothed_total_loss.GetMedianValue(),
                     eta=eta,
                     mb_qsize=int(
                         np.round(self.smoothed_mb_qsize.GetMedianValue())),
                     mem=int(np.ceil(mem_usage / 1024 / 1024)))
        for k, v in self.smoothed_losses_and_metrics.items():
            stats[k] = v.GetMedianValue()
        return stats
class TrainingStats(object):
    """Track vital training statistics."""
    def __init__(self, model):
        # Window size for smoothing tracked values (with median filtering)
        self.WIN_SZ = 1
        # Output logging period in SGD iterations
        self.LOG_PERIOD = 1
        self.smoothed_losses_and_metrics = {
            key: SmoothedValue(self.WIN_SZ)
            for key in model.losses + model.metrics
        }
        self.losses_and_metrics = {
            key: 0
            for key in model.losses + model.metrics
        }
        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
        self.iter_total_loss = np.nan
        self.iter_timer = Timer()
        self.model = model

    def IterTic(self):
        self.iter_timer.tic()

    def IterToc(self):
        return self.iter_timer.toc(average=False)

    def ResetIterTimer(self):
        self.iter_timer.reset()

    def UpdateIterStats(self):
        """Update tracked iteration statistics."""
        for k in self.losses_and_metrics.keys():
            if k in self.model.losses:
                self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k)
            else:
                self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)
        for k, v in self.smoothed_losses_and_metrics.items():
            v.AddValue(self.losses_and_metrics[k])
        self.iter_total_loss = np.sum(
            np.array([self.losses_and_metrics[k] for k in self.model.losses]))
        self.smoothed_total_loss.AddValue(self.iter_total_loss)
        self.smoothed_mb_qsize.AddValue(
            self.model.roi_data_loader._minibatch_queue.qsize())

    def LogIterStats(self, cur_iter, lr):
        """Log the tracked statistics."""
        if (cur_iter % self.LOG_PERIOD == 0
                or cur_iter == cfg.SOLVER.MAX_ITER - 1):
            stats = self.GetStats(cur_iter, lr)
            log_json_stats(stats)

    def GetStats(self, cur_iter, lr):
        eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER -
                                                      cur_iter)
        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
        stats = dict(
            iter=cur_iter,
            lr=float(lr),
            time=self.iter_timer.average_time,
            loss=self.smoothed_total_loss.GetMedianValue(),
            eta=eta,
            mb_qsize=int(np.round(self.smoothed_mb_qsize.GetMedianValue())),
            mem=int(np.ceil(mem_usage / 1024 / 1024)),
        )
        if cfg.TRAIN.DA_FADE_IN:
            stats['da_weight'] = self.model.da_fade_in.get_weight()
        if cfg.TRAIN.PADA:

            stats[
                'avg_pada_weight'] = self.model.class_weight_db.get_avg_pada_weight(
                )
            stats[
                'total_detects'] = self.model.class_weight_db.total_sum_softmax.sum(
                ) / 2
            stats['KL_div'] = self.model.class_weight_db.get_KL_to_init()
            stats['accuracy_fg'] = self.model.class_weight_db.fg_acc.get()
            stats[
                'acc_fg_weighted'] = self.model.class_weight_db.weighted_fg_acc.get(
                )

            target_dist = self.model.class_weight_db.get_dist()
            print('target_dist: {}'.format(list(target_dist)))
            class_weights = self.model.class_weight_db.class_weights
            print('class_weights: {}'.format(list(class_weights)))

            classes = np.array(
                dummy_datasets.get_coco_dataset().classes.values(), dtype=str)

            for dist in [target_dist, class_weights]:
                order = np.argsort(dist)[::-1]
                o_target_dist = target_dist[order]
                o_classes = classes[order]
                cwo = class_weights[order]
                print("dist tops: ", end='')
                for prob, w, c in list(zip(o_target_dist, cwo, o_classes))[:5]:
                    print("{}:{:.3f} ({:.3f})".format(c, prob, w), end=';  ')
                print()
            print()

        for k, v in self.smoothed_losses_and_metrics.items():
            stats[k] = v.GetMedianValue()
        # for k,v in stats.items():
        #     print(k,v)

        return stats
class TrainingStats(object):
    """Track vital training statistics."""

    def __init__(self, model, writer):
        # Window size for smoothing tracked values (with median filtering)
        self.WIN_SZ = 20
        # Output logging period in SGD iterations
        self.LOG_PERIOD = 20
        self.smoothed_losses_and_metrics = {
            key: SmoothedValue(self.WIN_SZ)
            for key in model.losses + model.metrics
        }
        self.losses_and_metrics = {
            key: 0
            for key in model.losses + model.metrics
        }
        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
        self.iter_total_loss = np.nan
        self.iter_timer = Timer()
        self.model = model
        self.writer = writer

    def IterTic(self):
        self.iter_timer.tic()

    def IterToc(self):
        return self.iter_timer.toc(average=False)

    def ResetIterTimer(self):
        self.iter_timer.reset()

    def UpdateIterStats(self, i_iter=0):
        """Update tracked iteration statistics."""
        for k in self.losses_and_metrics.keys():
            if k in self.model.losses:
                self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k)
            else:
                self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)
        for k, v in self.smoothed_losses_and_metrics.items():
            v.AddValue(self.losses_and_metrics[k])
            self.writer.add_scalar(k, self.losses_and_metrics[k], i_iter)
        self.iter_total_loss = np.sum(
            np.array([self.losses_and_metrics[k] for k in self.model.losses])
        )
        self.smoothed_total_loss.AddValue(self.iter_total_loss)
        # self.smoothed_mb_qsize.AddValue(
        #     self.model.roi_data_loader._minibatch_queue.qsize()
        # )

    def LogIterStats(self, cur_iter, lr):
        """Log the tracked statistics."""
        if (cur_iter % self.LOG_PERIOD == 0 or
                cur_iter == cfg.SOLVER.MAX_ITER - 1):
            stats = self.GetStats(cur_iter, lr)
            log_json_stats(stats)

    def GetStats(self, cur_iter, lr):
        eta_seconds = self.iter_timer.average_time * (
            cfg.SOLVER.MAX_ITER - cur_iter
        )
        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
        stats = dict(
            iter=cur_iter,
            lr=float(lr),
            time=self.iter_timer.average_time,
            loss=self.smoothed_total_loss.GetMedianValue(),
            eta=eta,
            # mb_qsize=int(
            #     np.round(self.smoothed_mb_qsize.GetMedianValue())
            # ),
            mem=int(np.ceil(mem_usage / 1024 / 1024))
        )
        for k, v in self.smoothed_losses_and_metrics.items():
            stats[k] = v.GetMedianValue()
        return stats
    
    def SaveTrainingStates(self, save_file):
        save_state = {
                key:0 for key in self.smoothed_losses_and_metrics.keys()
                }
        save_smooth_state = {
                key:0 for key in self.smoothed_losses_and_metrics.keys()
                }
        for k, v in self.smoothed_losses_and_metrics.items():
            save_state[k] = v.series
            save_smooth_state[k] = v.smooth_series
        save_state['total_loss'] = self.smoothed_total_loss.series
        save_smooth_state['total_loss'] = self.smoothed_total_loss.series
#        print(save_state)
        with open(save_file, 'w') as f:
            json.dump(save_state, f)
        # save smooth state
        save_smooth_file = save_file.replace('.json', '_smooth.json')
        with open(save_smooth_file, 'w') as f:
            json.dump(save_smooth_state, f)