class CallBackLogging(object): def __init__(self, rank, size, prefix_dir): self.batch_size = config.batch_size self.rank = rank self.size = size self.prefix_dir = prefix_dir self.frequent = config.frequent self.init = False self.tic = 0 self.last_count = 0 self.loss_metric = MetricNdarray() t = time.localtime() self.summary_writer = SummaryWriter( logdir=os.path.join(self.prefix_dir, "log_tensorboard", "%s_%s_%s" % (str(t.tm_mon), str(t.tm_mday), str(t.tm_hour))), verbose=False) def __call__(self, param): """Callback to Show speed """ count = param.num_update if self.last_count > count: self.init = False self.last_count = count self.loss_metric.update(param.loss[0]) if self.init: if count % self.frequent == 0: nd.waitall() try: speed = self.frequent * self.batch_size / (time.time() - self.tic) speed_total = speed * self.size except ZeroDivisionError: speed = float('inf') speed_total = float('inf') # summary loss loss_scalar = self.loss_metric.get() self.summary_writer.add_scalar(tag="loss", value=loss_scalar, global_step=param.num_update) loss_str_format = "[%d][%s]:%.2f " % (param.num_epoch, "loss", loss_scalar) self.loss_metric.reset() # summary speed self.summary_writer.add_scalar( tag="speed", value=speed, global_step=param.num_update) self.summary_writer.flush() if self.rank == 0: logging.info( "Iter:%d Rank:%.2f it/sec Total:%.2f it/sec %s", param.num_update, speed, speed_total, loss_str_format) self.tic = time.time() else: self.init = True self.tic = time.time()
class LogCallBack(object): def __init__(self, batch_size, head_name_list, rank, size, prefix_dir, frequent): self.batch_size = batch_size self.rank = rank self.size = size self.prefix_dir = prefix_dir self.frequent = frequent self.init = False self.tic = 0 self.last_count = 0 # self.head_name_list = head_name_list self.loss_metric_list = [MetricNdarray() for x in head_name_list] t = time.localtime() self.summary_writer = SummaryWriter( logdir=os.path.join(self.prefix_dir, 'log_tensorboard', str(t.tm_mon)+'_'+str(t.tm_mday) \ +'_'+str(t.tm_hour)), verbose=False) def __call__(self, param): self.logging(param) def logging(self, param): """Callback to Show speed.""" count = param.num_update if self.last_count > count: self.init = False self.last_count = count loss_list = param.loss_list for i in range(len(self.head_name_list)): self.loss_metric_list[i].update(loss_list[i]) if self.init: if count % self.frequent == 0: nd.waitall() try: speed = self.frequent * self.batch_size / (time.time() - self.tic) speed_total = speed * self.size except ZeroDivisionError: speed = float('inf') speed_total = float('inf') loss_str_format = "" # for idx, name in enumerate(self.head_name_list): loss_scalar = self.loss_metric_list[idx].get() # summary loss self.summary_writer.add_scalar( tag="%s_loss" % name, value=loss_scalar, global_step=param.num_update) _ = "[%d][%s]:%.2f " % (param.num_epoch_list[idx], name, loss_scalar) loss_str_format += _ self.loss_metric_list[idx].reset() # summary speed self.summary_writer.add_scalar(tag="speed", value=speed, global_step=param.num_update) self.summary_writer.flush() if self.rank == 0: logging.info( "Iter:%d Rank:%.2f it/sec Total:%.2f it/sec %s", param.num_update, speed, speed_total, loss_str_format) self.tic = time.time() else: self.init = True self.tic = time.time()