Esempio n. 1
0
class CallBackLogging(object):
    def __init__(self, rank, size, prefix_dir):
        self.batch_size = config.batch_size
        self.rank = rank
        self.size = size
        self.prefix_dir = prefix_dir
        self.frequent = config.frequent
        self.init = False
        self.tic = 0
        self.last_count = 0
        self.loss_metric = MetricNdarray()
        t = time.localtime()

        self.summary_writer = SummaryWriter(
            logdir=os.path.join(self.prefix_dir, "log_tensorboard",
            "%s_%s_%s" % (str(t.tm_mon), str(t.tm_mday), str(t.tm_hour))), verbose=False)

    def __call__(self, param):
        """Callback to Show speed
        """
        count = param.num_update

        if self.last_count > count:
            self.init = False
        self.last_count = count

        self.loss_metric.update(param.loss[0])

        if self.init:
            if count % self.frequent == 0:
                nd.waitall()
                try:
                    speed = self.frequent * self.batch_size / (time.time() - self.tic)
                    speed_total = speed * self.size
                except ZeroDivisionError:
                    speed = float('inf')
                    speed_total = float('inf')

                # summary loss
                loss_scalar = self.loss_metric.get()
                self.summary_writer.add_scalar(tag="loss",
                    value=loss_scalar, global_step=param.num_update)
                loss_str_format = "[%d][%s]:%.2f " % (param.num_epoch, "loss", loss_scalar)
                self.loss_metric.reset()
                # summary speed
                self.summary_writer.add_scalar(
                    tag="speed",
                    value=speed, global_step=param.num_update)
                self.summary_writer.flush()
                if self.rank == 0:
                    logging.info(
                        "Iter:%d Rank:%.2f it/sec Total:%.2f it/sec %s",
                        param.num_update, speed, speed_total, loss_str_format)
                self.tic = time.time()
        else:
            self.init = True
            self.tic = time.time()
Esempio n. 2
0
class LogCallBack(object):
    def __init__(self, batch_size, head_name_list, rank, size, prefix_dir,
                 frequent):
        self.batch_size = batch_size
        self.rank = rank
        self.size = size
        self.prefix_dir = prefix_dir
        self.frequent = frequent
        self.init = False
        self.tic = 0
        self.last_count = 0
        #
        self.head_name_list = head_name_list
        self.loss_metric_list = [MetricNdarray() for x in head_name_list]
        t = time.localtime()

        self.summary_writer = SummaryWriter(
            logdir=os.path.join(self.prefix_dir, 'log_tensorboard', str(t.tm_mon)+'_'+str(t.tm_mday) \
                                +'_'+str(t.tm_hour)),
            verbose=False)

    def __call__(self, param):
        self.logging(param)

    def logging(self, param):
        """Callback to Show speed."""
        count = param.num_update

        if self.last_count > count:
            self.init = False
        self.last_count = count

        loss_list = param.loss_list
        for i in range(len(self.head_name_list)):
            self.loss_metric_list[i].update(loss_list[i])

        if self.init:
            if count % self.frequent == 0:
                nd.waitall()
                try:
                    speed = self.frequent * self.batch_size / (time.time() -
                                                               self.tic)
                    speed_total = speed * self.size
                except ZeroDivisionError:
                    speed = float('inf')
                    speed_total = float('inf')

                loss_str_format = ""
                #
                for idx, name in enumerate(self.head_name_list):
                    loss_scalar = self.loss_metric_list[idx].get()

                    # summary loss
                    self.summary_writer.add_scalar(
                        tag="%s_loss" % name,
                        value=loss_scalar,
                        global_step=param.num_update)
                    _ = "[%d][%s]:%.2f " % (param.num_epoch_list[idx], name,
                                            loss_scalar)
                    loss_str_format += _
                    self.loss_metric_list[idx].reset()
                # summary speed
                self.summary_writer.add_scalar(tag="speed",
                                               value=speed,
                                               global_step=param.num_update)
                self.summary_writer.flush()
                if self.rank == 0:
                    logging.info(
                        "Iter:%d Rank:%.2f it/sec Total:%.2f it/sec %s",
                        param.num_update, speed, speed_total, loss_str_format)
                self.tic = time.time()
        else:
            self.init = True
            self.tic = time.time()