class TensorBoard: def __init__(self, path): self.path = path def initialize(self): mkdir_p(self.path) for i in range(1000): save_path = os.path.join(self.path, 'try{}'.format(i)) if not os.path.exists(save_path): self.writer = SummaryWriter(save_path) break def add_scalar(self, index, val, niter): self.writer.add_scalar(index, val, niter) def add_scalars(self, index, group_dict, niter): self.writer.add_scalar(index, group_dict, niter) def add_image_grid(self, index, nrow, x, niter): grid = make_image_grid(x, nrow) self.writer.add_image(index, grid, niter) def add_image_single(self, index, x, niter): self.writer.add_image(index, x, niter) def add_graph(self, index, x_input, model): torch.onnx.export(model, x_input, os.path.join(self.targ, "{}.proto".format(index)), verbose=True) self.writer.add_graph_onnx( os.path.join(self.targ, "{}.proto".format(index))) def export_json(self, out_file): self.writer.export_scalars_to_json(out_file)
class tf_recorder: def __init__(self, config): dir_list = sorted(os.listdir('log/')) save_dir = 'log/' + dir_list[len(dir_list) - 1] + '/tensorboard/' utils.mkdir(save_dir) for i in range(1000): self.targ = save_dir if not os.path.exists(self.targ): self.writer = SummaryWriter(self.targ) break def add_scalar(self, index, val, niter): self.writer.add_scalar(index, val, niter) def add_scalars(self, index, group_dict, niter): self.writer.add_scalar(index, group_dict, niter) def add_image_grid(self, index, ngrid, x, niter): grid = utils.make_image_grid(x, ngrid) self.writer.add_image(index, grid, niter) def add_image_single(self, index, x, niter): self.writer.add_image(index, x, niter) def add_graph(self, index, x_input, model): torch.onnx.export(model, x_input, os.path.join(self.targ, "{}.proto".format(index)), verbose=True) self.writer.add_graph_onnx(os.path.join(self.targ, "{}.proto".format(index))) def export_json(self, out_file): self.writer.export_scalars_to_json(out_file)
class tf_recorder: def __init__(self): os.system('mkdir -p repo/tensorboard') for i in range(1000): self.targ = 'repo/tensorboard/try_{}'.format(i) if not os.path.exists(self.targ): self.writer = SummaryWriter(self.targ) break def add_scalar(self, index, val, niter): self.writer.add_scalar(index, val, niter) def add_scalars(self, index, group_dict, niter): self.writer.add_scalar(index, group_dict, niter) def add_image_grid(self, index, ngrid, x, niter): grid = utils.make_image_grid(x, ngrid) self.writer.add_image(index, grid, niter) def add_image_single(self, index, x, niter): self.writer.add_image(index, x, niter) def add_graph(self, index, x_input, model): torch.onnx.export(model, x_input, os.path.join(self.targ, "{}.proto".format(index)), verbose=True) self.writer.add_graph_onnx( os.path.join(self.targ, "{}.proto".format(index))) def export_json(self, out_file): self.writer.export_scalars_to_json(out_file)
class tf_recorder: def __init__(self, _C): self.trg = os.path.join(_C.CKP.experiment_path, 'tensorboard') os.makedirs(self.trg, exist_ok=True) self.writer = SummaryWriter(self.trg) def add_scalar(self, index, val, niter): self.writer.add_scalar(index, val, niter) def add_scalars(self, index, group_dict, niter): self.writer.add_scalar(index, group_dict, niter) def add_image_grid(self, index, ngrid, x, niter): grid = utils.make_image_grid(x, ngrid) self.writer.add_image(index, grid, niter) def add_image_single(self, index, x, niter): self.writer.add_image(index, x, niter) def add_graph(self, index, x_input, model): torch.onnx.export(model, x_input, os.path.join(self.targ, "{}.proto".format(index)), verbose=True) self.writer.add_graph_onnx( os.path.join(self.targ, "{}.proto".format(index))) def export_json(self, out_file): self.writer.export_scalars_to_json(out_file)
def create_summary_writer(model, log_dir): writer = SummaryWriter(log_dir=log_dir) try: dummy_input = Variable(torch.rand(10, 1, 28, 28)) torch.onnx.export(model, dummy_input, "model.proto", verbose=True) writer.add_graph_onnx("model.proto") except ImportError: pass return writer
def set_summary(self, ): writer = SummaryWriter() try: dummy_data = Variable(torch.randn(1, 3, 224, 224)) torch.onnx.export(self.model, dummy_data, 'model.proto', verbose=True) writer.add_graph_onnx('model.proto') except ImportError: pass self.writer = writer
class TBCallback(TrainingCallback): def __init__(self, log_dir, input_dim=None): self.log_dir = log_dir self.input_dim = input_dim self.writer = SummaryWriter(log_dir) super().__init__() def before_training(self, model_trainer): if self.input_dim is not None: dummy_input = cuda_move(Variable(torch.zeros(self.input_dim))) model_file = self.log_dir + 'onnx_model.proto' torch.onnx.export(model_trainer.model, dummy_input, model_file, verbose=True) self.writer.add_graph_onnx(model_file) pass def after_epoch(self, model_trainer, train_data, validation_data): n_iter = model_trainer.global_step train_loss, train_metric = model_trainer.train_losses[ -1], model_trainer.train_metrics[-1] val_loss, val_metric = model_trainer.val_losses[ -1], model_trainer.val_metrics[-1] # data grouping by `slash` self.writer.add_scalar('data/train_loss', train_loss, n_iter) self.writer.add_scalar('data/train_metric', train_metric, n_iter) self.writer.add_scalar('data/val_loss', val_loss, n_iter) self.writer.add_scalar('data/val_metric', val_metric, n_iter) if n_iter % model_trainer.validation_steps == 0: # self.writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter) for name, param in model_trainer.model.named_parameters(): self.writer.add_histogram('param/' + name, param.clone().cpu().data.numpy(), n_iter, bins='sturges') self._save_gradient_histograms(model_trainer, train_data) def after_training(self, model_trainer): """ Export scalar data to JSON for external processing and save final weights as images. """ # for name, param in model_trainer.model.named_parameters(): # param = param.data.clone().cpu() # if len(param.size()) == 2: # images should have size (width, height, channel) # param = param.unsqueeze(2) # elif len(param.size()) == 1: # param = param.unsqueeze(1) # param = param.unsqueeze(2) # self.writer.add_image(name, param, model_trainer.global_step) self.writer.export_scalars_to_json("./all_scalars.json") self.writer.close() def _save_gradient_histograms(self, model_trainer, train_data): # Add gradient norm histogram n_iter = model_trainer.global_step random_shuffle = list(train_data.get_one_hot_list()) random.shuffle(random_shuffle) for par in model_trainer.model.parameters(): par.accumulated_grad = [] n_samples = 100 for X_i, y_i in random_shuffle[:n_samples]: X_data, y_data = cuda_move(X_i), cuda_move(y_i) # TODO: backprop through thousand of time steps y_out = model_trainer.model.forward(X_data, logits=True) loss = F.binary_cross_entropy_with_logits(y_out, y_data) model_trainer.model.zero_grad() loss.backward() for par in model_trainer.model.parameters(): par.accumulated_grad.append(par.grad) for name, par in model_trainer.model.named_parameters(): t = torch.stack(par.accumulated_grad, 0) self.writer.add_histogram('grad/' + name, t.clone().cpu().data.numpy(), n_iter, bins='sturges') par.accumulated_grad = None def __str__(self): return "TBCallback(logdir={})".format(self.log_dir)
# 写loss指标进图表 writer.add_scalar('loss', loss.data[0], step) i = i + 1 step = step + 1 print('Epoch[{}/{}], loss: {:.6f}'.format(epoch + 1, EPOCH, loss.data[0])) for name, param in cnn.named_parameters(): writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch) #触发dropout防止过拟合 cnn.eval() #使用测试训练集查看该轮训练情况 for i, (x, y) in enumerate(test_data_loader): batch_x = Variable(x) batch_y = Variable(y) # 输入训练数据 output = cnn(batch_x) print('fact={},predict={}'.format( batch_y.data[0], torch.max(output, 1)[1].data.numpy().squeeze())) # 保存训练图 torch.onnx.export(cnn, batch_x, "model/model.proto", verbose=True) writer.add_graph_onnx("model/model.proto") torch.save(cnn, 'model/model.pkl')
class Summarizer(object): def __init__(self): self.report = False self.global_step = None self.writer = None def initialize_writer(self, log_dir): self.writer = SummaryWriter(log_dir) def add_scalar(self, tag, scalar_value, global_step=None, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_scalar(tag, scalar_value, global_step=global_step, walltime=walltime) def add_scalars(self, main_tag, tag_scalar_dict, global_step=None, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_scalars(self, main_tag, tag_scalar_dict, global_step=global_step, walltime=walltime) def add_histogram(self, tag, values, global_step=None, bins='tensorflow', walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step if isinstance(values, chainer.cuda.cupy.ndarray): values = chainer.cuda.to_cpu(values) self.writer.add_histogram(tag, values, global_step=global_step, bins=bins, walltime=walltime) def add_image(self, tag, img_tensor, global_step=None, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_image(tag, img_tensor, global_step=global_step, walltime=walltime) def add_image_with_boxes(self, tag, img_tensor, box_tensor, global_step=None, walltime=None, **kwargs): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_image_with_boxes(tag, img_tensor, box_tensor, global_step=global_step, walltime=walltime, **kwargs) def add_figure(self, tag, figure, global_step=None, close=True, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_figure(tag, figure, global_step=global_step, close=close, walltime=walltime) def add_video(self, tag, vid_tensor, global_step=None, fps=4, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_video(tag, vid_tensor, global_step=global_step, fps=fps, walltime=walltime) def add_audio(self, tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_audio(tag, snd_tensor, global_step=global_step, sample_rate=sample_rate, walltime=walltime) def add_text(self, tag, text_string, global_step=None, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_text(tag, text_string, global_step=global_step, walltime=walltime) def add_graph_onnx(self, prototxt): if not self.report: return self.writer.add_graph_onnx(self, prototxt) def add_graph(self, model, input_to_model=None, verbose=False, **kwargs): if not self.report: return self.writer.add_graph(model, input_to_model=input_to_model, verbose=verbose, **kwargs) def add_embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_embedding(mat, metadata=metadata, label_img=label_img, global_step=global_step, tag=tag, metadata_header=metadata_header) def add_pr_curve(self, tag, labels, predictions, global_step=None, num_thresholds=127, weights=None, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_pr_curve(tag, labels, predictions, global_step=global_step, num_thresholds=num_thresholds, weights=weights, walltime=walltime) def add_pr_curve_raw(self, tag, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, global_step=None, num_thresholds=127, weights=None, walltime=None): if not self.report: return if global_step is None and self.global_step is not None: global_step = self.global_step self.writer.add_pr_curve_raw(tag, true_positive_counts, false_positive_counts, true_negative_counts, false_negative_counts, precision, recall, global_step=global_step, num_thresholds=num_thresholds, weights=weights, walltime=walltime) def add_custom_scalars_multilinechart(self, tags, category='default', title='untitled'): if not self.report: return self.writer.add_custom_scalars_multilinechart(tags, category=category, title=title) def add_custom_scalars_marginchart(self, tags, category='default', title='untitled'): if not self.report: return self.writer.add_custom_scalars_marginchart(tags, category=category, title=title) def add_custom_scalars(self, layout): if not self.report: return self.writer.add_custom_scalars(layout)
def run(batch_size, val_batch_size, epochs, lr, momentum, log_interval, logger, log_dir): writer = SummaryWriter(log_dir=log_dir) data_transform = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))]) train_loader = DataLoader(MNIST(download=True, root=".", transform=data_transform, train=True), batch_size=batch_size, shuffle=True) val_loader = DataLoader(MNIST(download=False, root=".", transform=data_transform, train=False), batch_size=val_batch_size, shuffle=False) model = Net() optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) try: dummy_input = Variable(torch.rand(10, 1, 28, 28)) torch.onnx.export(model, dummy_input, "model.proto", verbose=True) writer.add_graph_onnx("model.proto") except ImportError: pass def training_update_function(batch): model.train() optimizer.zero_grad() data, target = Variable(batch[0]), Variable(batch[1]) output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() return loss.data[0] def validation_inference_function(batch): model.eval() data, target = Variable(batch[0]), Variable(batch[1]) output = model(data) loss = F.nll_loss(output, target, size_average=False).data[0] pred = output.data.max(1, keepdim=True)[1] correct = pred.eq(target.data.view_as(pred)).sum() return loss, correct trainer = Trainer(training_update_function) evaluator = Evaluator(validation_inference_function) # trainer event handlers trainer.add_event_handler(Events.ITERATION_COMPLETED, log_simple_moving_average, window_size=100, metric_name="NLL", should_log=lambda trainer: trainer. current_iteration % log_interval == 0, logger=logger) trainer.add_event_handler( Events.ITERATION_COMPLETED, get_plot_training_loss_handler(writer, plot_every=log_interval)) trainer.add_event_handler( Events.EPOCH_COMPLETED, Evaluate(evaluator, val_loader, epoch_interval=1)) # evaluator event handlers evaluator.add_event_handler( Events.COMPLETED, get_log_validation_loss_and_accuracy_handler(logger)) evaluator.add_event_handler(Events.COMPLETED, get_plot_validation_loss_handler(writer), trainer) evaluator.add_event_handler(Events.COMPLETED, get_plot_validation_accuracy_handler(writer), trainer) # kick everything off trainer.run(train_loader, max_epochs=epochs) writer.close()