def train_net(self, train_loader, n_epoc): writer = SummaryWriter(comment=f'_WGAN_GP_{self.data_name}' ) # TODO to add hyper parmeters test_noise = self.generate_noise(64) n_sample = len(train_loader.dataset) for i in range(n_epoc): epoc_l_d, epoc_l_g, epoc_score_p, epoc_score_f1, epoc_score_f2 = 0., 0., 0., 0., 0. self.conv_gen.train(), self.conv_dis.train() with tqdm(total=len(train_loader), desc=f"epoc: {i + 1}") as pbar: for k, (real_img, _) in enumerate(train_loader): if IF_CUDA: real_img = real_img.cuda() d_loss, p_score, f_score1 = self.train_d_step(real_img) g_loss, f_score2 = self.train_g_step(real_img.shape[0]) batch_size = real_img.shape[0] epoc_l_d += d_loss * batch_size epoc_l_g += g_loss * batch_size epoc_score_p += p_score * batch_size epoc_score_f1 += f_score1 * batch_size epoc_score_f2 += f_score2 * batch_size pbar.set_postfix({ "d_loss": d_loss, "g_loss": g_loss, "p_score": p_score, "f_score D": f_score1, 'G': f_score2 }) pbar.update() epoc_l_d /= n_sample epoc_l_g /= n_sample epoc_score_p /= n_sample epoc_score_f1 /= n_sample epoc_score_f2 /= n_sample pbar.set_postfix({ "epoch: d_loss": epoc_l_d, "g_loss": epoc_l_g, "p_score": epoc_score_p, "f_score D": epoc_score_f1, 'G': epoc_score_f2 }) writer.add_scalar('loss/generator', epoc_l_g, i) writer.add_scalar('loss/discriminator', epoc_l_d, i) writer.add_scalar('score/real', epoc_score_p, i) writer.add_scalar('score/fake_D', epoc_score_f1, i) writer.add_scalar('score/fake_G', epoc_score_f2, i) self.conv_gen.eval(), self.conv_dis.eval() test_img = self.conv_gen(test_noise) test_img = ( test_img + 1.0) / 2.0 # Note that this is important to recover the range test_img = test_img.reshape(64, *self.img_shape) writer.add_images('img', test_img, i + 1) writer.close() return
class Visualizer(): """Tensorboard wrapper""" def __init__(self, opt): log_dir = os.path.join(opt.log_dir, opt.name) if not os.path.exists(log_dir): os.makedirs(log_dir) print('Directory created: %s' % log_dir) self.writer = SummaryWriter(log_dir) print('Visualiser created (log in %s)' % log_dir) def add_models(self, models=[]): for model in models: self.writer.add_graph(model['model'], model['input']) def add_values(self, values=[]): for idx, value in enumerate(values): label = value['name'] if 'name' in value else str(idx) x = value['x'] if 'x' in value else 0 y = value['y'] if 'y' in value else 0 self.writer.add_scalar(label, y, x) def add_images(self, images=[]): for idx, image in enumerate(images): if 'data' not in image: continue img = image['data'] img = (img + 1.) * 0.5 img[img < 0] = 0 img[img > 1] = 1 label = image['name'] if 'name' in image else str(idx) step = image['step'] if 'step' in image else 0 self.writer.add_images(label, img, step)
class Visualizer: def __init__(self, log_dir): self.log_dir = log_dir self.writer = SummaryWriter(log_dir) def add_losses(self, tag, losses, epoch): for k, v in losses.items(): self.writer.add_scalar("{}/{}".format(tag, k), v, epoch, walltime=time.time()) def add_images(self, tag, imgs, epoch, nrow=5, dataformats='NCHW'): self.writer.add_images(tag, imgs, epoch, walltime=time.time(), dataformats=dataformats) def add_image(self, tag, img, epoch, dataformats='CHW'): self.writer.add_image(tag, img.squeeze(0), epoch, walltime=time.time(), dataformats=dataformats) def add_histogram(self, tag, val, epoch): self.writer.add_histogram(tag, val, epoch, walltime=time.time()) def blend_heatmap(self, img, heatmap): img = img.cpu().detach() heatmap = heatmap.cpu().detach() blended = blend(img, heatmap) blended_img = torchvision.transforms.ToPILImage()(blended) return blended_img
def saveDataloader(self, trainloader, idx=0): for i_batch, sample in enumerate(trainloader): if (i_batch == idx): break input, gt_tensor = sample['inputs'], sample['gt'] if (self.dim5D): # (b, c, t, h, w) -> (t, b, h, w, c) input_tensor = input.permute(2, 0, 3, 4, 1)[0] gt_tensor = gt_tensor.permute(2, 0, 3, 4, 1)[0] else: # (b, c, h, w) -> (b, h, w, c) input_tensor = input.permute(0, 2, 3, 1) gt_tensor = gt_tensor.permute(0, 2, 3, 1) # normalize image input_tensor = torch.div(input_tensor, 255) gt_tensor = torch.div(gt_tensor, 255) file_writer = SummaryWriter(self.logdir + '/Sample_data') file_writer.add_images("Groundtruth images", gt_tensor, dataformats='NHWC') file_writer.add_images("Input images", input_tensor, dataformats='NHWC') file_writer.close()
def main(name_dataset='voc', batch_size=8, model="unet34", num_classes=256, pretrained=True, resume=None, lr=0.001, size=448, epochs=10, dir_ckp='./ckp', debug=False): torch.backends.cudnn.benchmark = True if debug: device = 'cpu' else: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # transform train_transform = T_seg.Compose([ T_seg.RandomCrop(size), # <- should be match to U-Net or other networks' size T_seg.RandomHorizontalFlip(), T_seg.RandomVerticalFlip(), T_seg.ToTensor(), T_seg.Normalize(), ]) val_transform = T_seg.Compose([ T_seg.ToTensor(), T_seg.Normalize(), ]) train_data = get_dataset(name=name_dataset, mode="train", transform=train_transform) val_data = get_dataset(name=name_dataset, mode="val", transform=val_transform) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True) print(train_data.__getitem__(1)) # images, labels = next(iter(train_loader)) # model model = get_model_torchsat(model, num_classes, pretrained=pretrained) model.to(device) if resume is not None: model.load_state_dict(torch.load(resume, map_location=device)) # loss criterion = nn.CrossEntropyLoss().to(device) # optim and lr scheduler optimizer = optim.Adam(model.parameters(), lr=lr) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) print(model) print(summary(model, input_size=(3, size, size), device=device)) mean, std = calc_normalization(train_loader) normalization = {} normalization = {'mean': mean, 'std': std} writer = SummaryWriter(log_dir="./logs") # display some examples in tensorboard images, labels = next(iter(train_loader)) originals = images * std.view(3, 1, 1) + mean.view(3, 1, 1) writer.add_images('images/original', originals, 0) writer.add_images('images/normalized', images, 0) writer.add_graph(model, images.to(device)) for epoch in range(epochs): writer.add_scalar('train/lr', lr_scheduler.get_lr()[0], epoch) train_epoch(train_loader, model, criterion, optimizer, epoch, device, writer, show_progress=True) evaluate_epoch(val_loader, model, criterion, epoch, writer) lr_scheduler.step() torch.save(model.state_dict(), os.path.join(dir_ckp, 'cls_epoch_{}.pth'.format(epoch)))
class Logger: def __init__(self, model_id, experiment_id): self.model_id = model_id self.writer = SummaryWriter(os.path.join('runs', experiment_id, model_id)) self.offset = 0 def log_scalars(self, tag, tag_value_dict, epoch, n_batch, num_batches): step = Logger._step(epoch, n_batch, num_batches, self.offset) self.writer.add_scalars(main_tag = tag, tag_scalar_dict = tag_value_dict, global_step = step) def log_histogram(self, tag, values, epoch, n_batch, num_batches): step = Logger._step(epoch, n_batch, num_batches, self.offset) values = numpy.array([ (v**2).sum().sqrt() if not v == None else 0 for v in values ]) self.writer.add_histogram(tag = tag, values = values, global_step = step) def gradient_frames(self, tag, gradient, epoch, n_batch, num_batches): step = Logger._step(epoch, n_batch, num_batches, self.offset) self.writer.add_images(tag = tag, img_tensor = gradient, global_step = step) def save_images(self, tag, image, epoch, n_batch, num_batches): step = Logger._step(epoch, n_batch, num_batches, self.offset) self.writer.add_images(tag = tag, img_tensor = image, global_step = step) def save_model(self, model, images): self.writer.add_graph(model, images) def set_offset(self, offset): self.offset = offset @staticmethod # If offset > 0, epoch 0 is of variable length in included in offset def _step(epoch, n_batch, num_batches, offset = 0): return offset + n_batch + (epoch - (offset > 0)) * num_batches
class TensorboardLogger(Logger): def __init__(self, log_interval=50, validation_interval=200, generate_interval=500, trainer=None, generate_function=None, log_dir='logs'): super().__init__(log_interval, validation_interval, generate_interval, trainer, generate_function) self.writer = SummaryWriter(log_dir) def log_loss(self, current_step): # loss avg_loss = self.accumulated_loss / self.log_interval self.writer.add_scalar('loss', avg_loss, current_step) def validate(self, current_step): avg_loss, avg_accuracy = self.trainer.validate() self.writer.add_scalar('validation/loss', avg_loss, current_step) self.writer.add_scalar('validation/accuracy', avg_accuracy, current_step) def log_audio(self, step): samples = self.generate_function() self.writer.add_audio('audio sample', samples, step, sample_rate=16000) def image_summary(self, tag, images, step): """Log a list of images.""" self.writer.add_images(tag, images, step) def audio_summary(self, tag, sample, step, sr=16000): self.writer.add_audio(tag, sample, sample_rate=sr)
class TensorboardLogger: """ Log metrics to Visdom. It logs scalars and scalar tensors as plots, 3D and 4D tensors as images, and strings as HTML. Args: log_dir (str): path of the loging directory. Default to runs/CURRENT_DATETIME_HOSTNAME log_every (int): batch logging freq. -1 logs on epoch ends only. prefix (str): prefix for all metrics name """ def __init__(self, log_dir=None, log_every=10, prefix='', post_epoch_ends=True): assert HAS_TS, ("Can't import Tensorboard. Some callbacks will not " "work properly") self.writer = SummaryWriter(log_dir=log_dir) self.log_every = log_every self.prefix = prefix self.post_epoch_ends = post_epoch_ends def on_batch_start(self, state): iters = state['iters'] state['visdom_will_log'] = (self.log_every != -1 and iters % self.log_every == 0) @torch.no_grad() def on_batch_end(self, state): iters = state['iters'] if self.log_every != -1 and iters % self.log_every == 0: self.log(iters, state['metrics']) def on_epoch_end(self, state): if self.post_epoch_ends: self.log(state['iters'], state['metrics']) def log(self, iters, xs, store_history=[]): for name, x in xs.items(): name = self.prefix + name if isinstance(x, (float, int)): self.writer.add_scalar(name, x, iters) elif isinstance(x, str): self.writer.add_text(name, x, iters) elif isinstance(x, torch.Tensor): if x.numel() == 1: self.writer.add_scalar(name, x, iters) elif x.dim() == 2: self.writer.add_image(name, x, iters, dataformats='HW') elif x.dim() == 3: self.writer.add_image(name, x, iters, dataformats='CHW') elif x.dim() == 4: self.writer.add_images(name, x, iters, dataformats='NCHW') else: assert False, "incorrect tensor shape {} for {}".format( repr(x.shape), name) else: assert False, "incorrect type {} for key {}".format( x.__class__.__name__, name)
def train(self, train_loader, test_loader, n_epoch, g_step, d_step): epoch_loss_d = 0. epoch_loss_g = 0. epoch_score_p = 0. epoch_score_f = 0. writer = SummaryWriter() test_noise = torch.randn(64, self.dim_noise, device="cuda:0") for i in range(n_epoch): if CLOSE_DROPOUT: self.generator.eval() self.discriminator.eval() else: self.generator.train() self.discriminator.train() with tqdm(total=len(train_loader), desc=f"epoc{i}") as pbar: for k, (data_real, lbl) in enumerate(train_loader): data_real = data_real.reshape(-1, 784) data_real = data_real.cuda() d_loss, p_score, f_score = self.train_discriminator( data_real, d_step) g_loss = self.train_generator(g_step, data_real.shape[0]) epoch_loss_d += d_loss epoch_loss_g += g_loss epoch_score_f += f_score epoch_score_p += p_score pbar.set_postfix({ "d_loss": d_loss, "g_loss": g_loss, "p_score": p_score, "f_score": f_score }) pbar.update() epoch_loss_g = epoch_loss_g / (k + 1) epoch_loss_d = epoch_loss_d / (k + 1) epoch_score_p /= k + 1 epoch_score_f /= k + 1 pbar.set_postfix({ "epoch: d_loss": epoch_loss_d, "g_loss": epoch_loss_g, "p_score": epoch_score_p, "f_score": epoch_score_f }) writer.add_scalar('loss/generator', epoch_loss_g, i) writer.add_scalar('loss/discriminator', epoch_loss_d, i) writer.add_scalar('score/real', epoch_score_p, i) writer.add_scalar('score/fake', epoch_score_f, i) # self.generator.eval() //TODO to recover # self.discriminator.eval() test_img = self.generator(test_noise) test_img = (test_img + 1.0) / 2.0 test_img = test_img.reshape(-1, 1, 28, 28) writer.add_images('img', test_img, i) return
def sample_image(writer: SummaryWriter, samples_per_class: int, iterations: int): z = torch.randn(samples_per_class * n_classes, opt.latent_dim).to(device) labels = np.array( [num for num in range(samples_per_class) for _ in range(n_classes)]) labels = torch.Tensor(labels).to(device) gen_imgs = generator(z, labels) writer.add_images('gan_grid', gen_imgs, iterations)
def train(opt): model = ResNet_CIFAR10(9).cuda() model.load_state_dict(torch.load('checkpoint.pt')) model.train() train_dataset = CIFAR10Dataset('train') train_data_loader = CIFAR10Dataloader('train', opt, train_dataset) test_dataset = CIFAR10Dataset('test') test_data_loader = CIFAR10Dataloader('test', opt, test_dataset) optim = torch.optim.Adam(model.parameters(), lr=0.0001) criterion = Loss() writer = SummaryWriter() for epoch in range(opt.epoch): for i in range(len(train_data_loader.data_loader)): step = epoch * len(train_data_loader.data_loader) + i + 1 # load data image, label = train_data_loader.next_batch() image = image.cuda() label = label.cuda() # train model optim.zero_grad() result = model(image) loss = criterion(result, label) loss.backward() optim.step() writer.add_scalar('loss', loss, step) writer.add_images('image', image, step, dataformats="NCHW") writer.close() if step % opt.display_step == 0: _, predicted = torch.max(result, 1) total = label.size(0) correct = (predicted == label).sum().item() total_test = 0 correct_test = 0 for i in range(len(test_data_loader.data_loader)): image, label = test_data_loader.next_batch() image = image.cuda() label = label.cuda() result = model(image) _, predicted = torch.max(result, 1) total_test += label.size(0) correct_test += (predicted == label).sum().item() print( '[Epoch {}] Loss : {:.2}, train_acc : {:.2}, test_acc : {:.2}' .format(epoch, loss, correct / total, correct_test / total_test)) torch.save(model.state_dict(), 'checkpoint.pt')
def sample(generator, latent_dim): batch_size = 16 z = torch.randn(batch_size, latent_dim, 1, 1) h = generator(z) print(h.shape) writer = SummaryWriter( log_dir=f'/home/ubuntu/ai-core/runs/Generated-{time()}') print('adding imgs') writer.add_images(f'Generated/Loss/Gen', h)
def tensorboard_rerenders(writer: SummaryWriter, number_validation_images, rerender_images, ground_truth_images, step, ray_warps=None): writer.add_images('{} all validation images'.format(step), rerender_images[..., ::-1].transpose((0, 3, 1, 2)), step) if number_validation_images > len(rerender_images): print( 'there are only ', len(rerender_images), ' in the validation directory which is less than the specified number_validation_images: ', number_validation_images, ' So instead ', len(rerender_images), ' images are sent to tensorboard') number_validation_images = len(rerender_images) else: rerender_images = rerender_images[:number_validation_images] if number_validation_images > 0: if ray_warps is not None: image_col = 3 else: image_col = 2 fig, axarr = plt.subplots(number_validation_images, image_col, sharex=True, sharey=True) if len(axarr.shape) == 1: axarr = axarr[None, :] for i in range(number_validation_images): # strange indices after image because matplotlib wants bgr instead of rgb axarr[i, 0].imshow(ground_truth_images[i][:, :, ::-1]) axarr[i, 0].axis('off') axarr[i, 1].imshow(rerender_images[i][:, :, ::-1]) axarr[i, 1].axis('off') if ray_warps is not None: w = axarr[i, 2].imshow(ray_warps[i]) axarr[i, 2].axis('off') last_axes = plt.gca() ax = w.axes fig = ax.figure divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) fig.colorbar(w, cax=cax) plt.sca(last_axes) axarr[0, 0].set_title('Ground Truth') axarr[0, 1].set_title('Rerender') if ray_warps is not None: axarr[0, 2].set_title('Warp Intensity') fig.set_dpi(300) writer.add_figure(str(step) + ' validation images', fig, step) plt.close()
def train(config): patches = patch_dataset.PatchDataset(config, apply_color_jitter=True) print('{} mode and there are {} patches...'.format(config.mode, str(len(patches)))) data_loader = torch.utils.data.DataLoader(patches, batch_size=config.batch_size) model = models.DeepModel(config, is_eval=False) writer = SummaryWriter(log_dir=os.path.join(config.log_dir, model.name())) if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.save_dir): os.makedirs(config.save_dir) iter_idx = -1 max_val_acc = float('-inf') intv_loss = 0 for epoch in range(config.epoch): prefix = 'Training Epoch {:3d}: '.format(epoch) for data in tqdm(data_loader, desc=prefix): iter_idx += 1 train_data, train_labels, orig_patch = data pred_labels_logits, pred_labels_probs = model.forward(train_data) model.optimize_parameters(pred_labels_logits, train_labels) intv_loss = intv_loss + model.get_current_errors() if iter_idx % config.rep_intv == 0: writer.add_scalar('Training CrossEntropyLoss', intv_loss / config.rep_intv, global_step=iter_idx) intv_loss = 0 val_acc = model.eval(eval_data_ids=patches.eval_data_ids) if max_val_acc < val_acc: max_val_acc = val_acc model.save_state(model_id='max_val_acc') writer.add_scalar('Validation Accuracy', val_acc, global_step=iter_idx) if config.log_patches: concat_patches = postprocess.hori_concat_img( orig_patch.numpy().transpose(0, 3, 1, 2)) writer.add_images('Patches', concat_patches, global_step=iter_idx, dataformats='HWC')
def test(net, testloader, dataset_name,): model_dir_header = net.get_identifier() chp_dir = os.path.join('checkpoints', model_dir_header) checkpoint_name_path = os.path.join(chp_dir, '{}_checkpoints_final.pth'.format(dataset_name)) net = torch.load(checkpoint_name_path) net.cuda() writer = SummaryWriter(os.path.join('logs', model_dir_header)) test_loss, test_f1_score, test_focal_loss, test_dice_loss, samples = metrics(net, testloader, 0) writer.add_scalars('Test/Metrics', {'focal_loss': test_focal_loss, 'dice_loss':test_dice_loss}, 0) writer.add_scalar('Test/Metrics/loss', test_loss, 0) writer.add_scalar('Test/Metrics/f1_score', test_f1_score, 0) grid = images_display.join_image_batches(samples) writer.add_images('Test_sample', grid, 0, dataformats='HWC')
class TFLogger(object): def __init__(self, log_dir): self.writer = SummaryWriter(log_dir + datetime.now().strftime("%Y-%m-%d %H-%M-%S")) def scalar_summary(self, name, value, step): """Log a scalar""" self.writer.add_scalar(name, value, step) def images_summary(self, name, images, step): """Log a list of images Inputs: images: torch tensor or numpy array """ self.writer.add_images(name, images, step)
class VisualizerTensorboard: def __init__(self, opts): self.dtype = {} self.iteration = 1 self.writer = SummaryWriter(opts.logs_dir) def register(self, modules): # here modules are assumed to be a dictionary for key in modules: self.dtype[key] = modules[key]['dtype'] def update(self, modules): for key, value in modules: if self.dtype[key] == 'scalar': self.writer.add_scalar(key, value, self.iteration) elif self.dtype[key] == 'scalars': self.writer.add_scalars(key, value, self.iteration) elif self.dtype[key] == 'histogram': self.writer.add_histogram(key, value, self.iteration) elif self.dtype[key] == 'image': self.writer.add_image(key, value, self.iteration) elif self.dtype[key] == 'images': self.writer.add_images(key, value, self.iteration) elif self.dtype[key] == 'figure': self.writer.add_figure(key, value, self.iteration) elif self.dtype[key] == 'video': self.writer.add_video(key, value, self.iteration) elif self.dtype[key] == 'audio': self.writer.add_audio(key, value, self.iteration) elif self.dtype[key] == 'text': self.writer.add_text(key, value, self.iteration) elif self.dtype[key] == 'embedding': self.writer.add_embedding(key, value, self.iteration) elif self.dtype[key] == 'pr_curve': self.writer.pr_curve(key, value['labels'], value['predictions'], self.iteration) elif self.dtype[key] == 'mesh': self.writer.add_audio(key, value, self.iteration) elif self.dtype[key] == 'hparams': self.writer.add_hparams(key, value['hparam_dict'], value['metric_dict'], self.iteration) else: raise Exception( 'Data type not supported, please update the visualizer plugin and rerun !!' ) self.iteration = self.iteration + 1
class TensorboardLogger(object): """Logger class to graphical summary in tensorboard""" def __init__(self, log_dir): # Writer will output to ./runs/ directory by default self.writer = SummaryWriter(log_dir) def write_image_batch(self, title, img_batch, step=0): self.writer.add_images(title, img_batch, step) def add_scalar(self, tag, value, step): self.writer.add_scalar(tag, value, step) def add_scalars(self, tag, value_dict, step): self.writer.add_scalars(tag, value_dict, step) def close(self): self.writer.close()
def Summarize(outdir, step, **kwargs): writer = SummaryWriter(outdir) cast_kwargs = {} for k, v in kwargs.items(): v = v.cpu().detach().numpy() if v.size == 1: writer.add_scalar(k, v, step) elif v.shape[1] != 3 and v.shape[1] != 1: # v = np.transpose(v, [0, 2, 3, 1]) #writer.add_images(k, v, step) # TODO split into separate channels pass else: # v = np.transpose(v, [0, 2, 3, 1]) writer.add_images(k, v, step)
def fit(net, trainloader, validationloader, dataset_name, epochs=1000, lower_learning_period=10): model_dir_header = net.get_identifier() chp_dir = os.path.join('checkpoints', model_dir_header) checkpoint_name_path = os.path.join(chp_dir, '{}_checkpoints.pth'.format(dataset_name)) checkpoint_conf_path = os.path.join(chp_dir, '{}_configuration.json'.format(dataset_name)) train_config = TrainingConfiguration() if os.path.exists(chp_dir): net = torch.load(checkpoint_name_path) train_config.load(checkpoint_conf_path) net.cuda() summary(net, (3, 512, 512)) writer = SummaryWriter(os.path.join('logs', model_dir_header)) for epoch in range(train_config.epoch, epochs): loss, f1_score, focal_loss, dice_loss, samples = fit_epoch(net, trainloader, train_config.learning_rate, epoch=epoch) writer.add_scalars('Train/Metrics', {'focal_loss': focal_loss, 'dice_loss':dice_loss}, epoch) writer.add_scalar('Train/Metrics/loss', loss, epoch) writer.add_scalar('Train/Metrics/f1_score', f1_score, epoch) grid = images_display.join_image_batches(samples) writer.add_images('train_sample', grid, epoch, dataformats='HWC') val_loss, val_f1_score, val_focal_loss, val_dice_loss, samples = metrics(net, validationloader, epoch) writer.add_scalars('Validation/Metrics', {'focal_loss': val_focal_loss, 'dice_loss':val_dice_loss}, epoch) writer.add_scalar('Validation/Metrics/loss', val_loss, epoch) writer.add_scalar('Validation/Metrics/f1_score', val_f1_score, epoch) grid = images_display.join_image_batches(samples) writer.add_images('validation_sample', grid, epoch, dataformats='HWC') os.makedirs((chp_dir), exist_ok=True) if train_config.best_metric < val_f1_score: train_config.iteration_age = 0 train_config.best_metric = val_f1_score print('Epoch {}. Saving model with metric: {}'.format(epoch, val_f1_score)) torch.save(net, checkpoint_name_path.replace('.pth', '_final.pth')) else: train_config.iteration_age+=1 print('Epoch {} metric: {}'.format(epoch, val_f1_score)) if train_config.iteration_age==lower_learning_period: train_config.learning_rate*=0.5 train_config.iteration_age=0 print("Learning rate lowered to {}".format(train_config.learning_rate)) train_config.epoch = epoch+1 train_config.save(checkpoint_conf_path) torch.save(net, checkpoint_name_path) torch.save(net.state_dict(), checkpoint_name_path.replace('.pth', '_state_dict.pth')) print('Finished Training') return train_config.best_metric
def train(max_iter=50000): reader = Reader() model = MNISTModel(5e-4) # noinspection PyUnresolvedReferences model = model.cuda() scheduler = th.optim.lr_scheduler.MultiStepLR(model.optimizer, milestones=[20, 40], gamma=0.1) time_string = strftime("%a%d%b%Y-%H%M%S", gmtime()) writer = SummaryWriter('./result/mnist2/log/' + 'no_cond' + time_string) for i in range(max_iter): x, l = reader.get_batch_tensor() x = reader.augmentation(x) z, log_j = model(x, l, one_hot_l=True) nll = th.mean(z**2) / 2 - th.mean(log_j) / (28 * 28) nll.backward() th.nn.utils.clip_grad_norm_(model.trainable_parameters, 10.) model.optimizer.step() model.optimizer.zero_grad() if i % 50 == 0: print('step {} loss {} lr {}'.format( i, nll.item(), model.optimizer.param_groups[0]['lr'])) writer.add_scalar('loss', nll, i) img = x.view([-1, 28, 28, 1]) writer.add_images('image', img[:16, :, :, :], i, dataformats='NHWC') z = 1.0 * th.randn(reader.batch_size, 28 * 28).cuda() with th.no_grad(): samples, _ = model(z, l, one_hot_l=True, reverse=True) samples = samples * 0.305 + 0.128 samples = th.clamp(samples, 0, 1) samples = samples.view([-1, 28, 28, 1]) writer.add_images('gen', samples[:16, :, :, :], i, dataformats='NHWC') if i % 230 == 0 and i > 0: scheduler.step() writer.close()
def fit(net, trainloader, validationloader, dataset_name, box_transform, epochs=1000, lower_learning_period=10): model_dir_header = net.get_identifier() chp_dir = os.path.join('checkpoints', model_dir_header) checkpoint_name_path = os.path.join(chp_dir, '{}_checkpoints.pth'.format(dataset_name)) checkpoint_conf_path = os.path.join(chp_dir, '{}_configuration.json'.format(dataset_name)) train_config = TrainingConfiguration() if os.path.exists(chp_dir): net = torch.load(checkpoint_name_path) train_config.load(checkpoint_conf_path) net.cuda() summary(net, (3, 224, 224)) writer = SummaryWriter(os.path.join('logs', model_dir_header)) for epoch in range(train_config.epoch, epochs): loss, objectness_loss, size_loss, offset_loss, class_loss, samples = fit_epoch(net, trainloader, train_config.learning_rate, box_transform, epoch=epoch) writer.add_scalars('Train/Metrics', {'objectness_loss': objectness_loss, 'size_loss':size_loss, 'offset_loss':offset_loss, 'class_loss':class_loss}, epoch) writer.add_scalar('Train/Metrics/loss', loss, epoch) grid = join_images(samples) writer.add_images('train_sample', grid, epoch, dataformats='HWC') validation_map, loss, objectness_loss, size_loss, offset_loss, class_loss, samples = metrics(net, validationloader, box_transform, epoch) writer.add_scalars('Validation/Metrics', {'objectness_loss': objectness_loss, 'size_loss':size_loss, 'offset_loss':offset_loss, 'class_loss':class_loss}, epoch) writer.add_scalar('Validation/Metrics/loss', loss, epoch) writer.add_scalar('Validation/Metrics/validation_map', validation_map, epoch) grid = join_images(samples) writer.add_images('validation_sample', grid, epoch, dataformats='HWC') os.makedirs((chp_dir), exist_ok=True) if train_config.best_metric > loss: train_config.iteration_age = 0 train_config.best_metric = loss print('Epoch {}. Saving model with metric: {}'.format(epoch, loss)) torch.save(net, checkpoint_name_path.replace('.pth', '_final.pth')) else: train_config.iteration_age+=1 print('Epoch {} metric: {}'.format(epoch, loss)) if train_config.iteration_age==lower_learning_period: train_config.learning_rate*=0.5 train_config.iteration_age=0 print("Learning rate lowered to {}".format(train_config.learning_rate)) train_config.epoch = epoch+1 train_config.save(checkpoint_conf_path) torch.save(net, checkpoint_name_path) torch.save(net.state_dict(), checkpoint_name_path.replace('.pth', '_final_state_dict.pth')) print('Finished Training') return best_map
def saveImgTest(self, model_name, i_step, inputs_, groundtruth_, prediction_): if (self.dim5D): # (b, c, t, h, w) -> (t, b, c, h, w) -> get first frame of the sequence of frames inputs = inputs_.permute(2, 0, 1, 3, 4)[0] groundtruth = groundtruth_.permute(2, 0, 1, 3, 4)[0] prediction = prediction_.permute(2, 0, 1, 3, 4)[0] else: # (b, c, h, w) -> (b, h, w, c) inputs = inputs_ groundtruth = groundtruth_ prediction = prediction_ # get only the first element of the batch inputs = inputs[0].cpu().numpy() / 255.0 groundtruth = groundtruth[0].cpu().numpy() prediction = prediction[0].cpu().numpy() # change -1 to a gray color shape = groundtruth.shape groundtruth = groundtruth.reshape(-1) idx = np.where(groundtruth == -1)[0] # find non-ROI if (len(idx) > 0): groundtruth[idx] = 0.55 groundtruth = groundtruth.reshape(shape) inputs = torch.from_numpy(inputs) groundtruth = torch.from_numpy(groundtruth) prediction = torch.from_numpy(prediction) # gray to rgb to concat three images in the future groundtruth = groundtruth.repeat(3, 1, 1) prediction = prediction.repeat(3, 1, 1) # concat thre images into one image images = torch.cat((inputs, groundtruth, prediction), 2) # write on tensorboard file_writer = SummaryWriter(self.logdir + '/') file_writer.add_images(model_name + '/' + str(i_step), images, global_step=None, dataformats='CHW') file_writer.close()
class TensorboardLogger(Callback): learn: Learner run_name: str histogram_freq: int = 50 path: str = None num_epoch: int = 0 writer: SummaryWriter = None def __post_init__(self): self.path = self.path or os.path.join(self.learn.path, "runs") self.log_dir = os.path.join(self.path, self.run_name) def on_train_begin(self, **kwargs): self.writer = SummaryWriter(log_dir=self.log_dir) def on_epoch_end(self, **kwargs): logging.info("Epoch ended !") if self.num_epoch % 3 == 0: self.learn.export(os.getcwd() + '/data/' + self.run_name + '_EXPORT_{}.pth'.format(self.num_epoch)) self.num_epoch += 1 def on_batch_end(self, **kwargs): iteration = kwargs["iteration"] loss = kwargs["last_loss"] if iteration % self.histogram_freq == 0: self.writer.add_scalar("learning_rate", self.learn.opt.lr, iteration) self.writer.add_scalar("momentum", self.learn.opt.mom, iteration) self.writer.add_scalar("loss", loss, iteration) if (iteration % (self.histogram_freq * 5)) == 0: li = kwargs['last_input'][:3, 0, :, :].unsqueeze(1) lt = kwargs['last_target'][:3, :-2, :, :] lo = kwargs['last_output'][:3, :-2, :, :] #plt.imshow(li[0, 0]) #plt.show() self.writer.add_images('images', li, iteration) self.writer.add_images('masks/true', lt, iteration) self.writer.add_images('masks/pred', lo, iteration)
def publish_image_results(dashboard: SummaryWriter, mode: str, steps: int, autoencoder: SVBRDFAutoencoder, network_svbrdf: SVBRDF, dataset: Dataset, materials: Iterable[int]) -> None: ''' Publishes a series of images to the given TensorBoard depicting reconstructions (diverse and otherwise) of the specified materials. Args: dashboard: TensorBoard to host the published data. mode: Mode associated with the published data (i.e., "Training" or "Testing"). steps: Step count associated with the published data. autoencoder: SVBRDFAutoencoder to be used to reconstruct the Dataset images. svbrdf: SVBRDF intended for the output of the SVBRDF autoencoder. dataset: Dataset consisting of inputs to the SVBRDF autoencoder and ground-truth Tensors. materials: Indices of the materials in the Dataset to be reconstructed. ''' for material in materials: texture = dataset.textures[material] # One sample should be enough to hint at the reconstruction performance of the SVBRDF autoencoder. dataset_batch, (dataset_normals, dataset_svbrdf) = dataset.sample(material) network_normals, network_svbrdf.parameters = SVBRDFAutoencoder.interpret( autoencoder.forward(dataset_batch)) have_radiance, want_radiance = compute_radiance( network_normals=network_normals, network_svbrdf=network_svbrdf, dataset_normals=dataset_normals, dataset_svbrdf=dataset_svbrdf) # The sRGB colour space applies a desirable gamma correction. input_image = image.convert_RGB_to_sRGB(dataset_batch[0, :3].permute( 1, 2, 0)) have_image = image.convert_RGB_to_sRGB(have_radiance[0]) want_image = image.convert_RGB_to_sRGB(want_radiance[0]) reconstruction_images = [input_image, want_image, have_image] # By convention, the shader module in this repository outputs radiance in [B, R, C, 3] order. dashboard.add_images(tag=f'{mode} / {texture}', global_step=steps, dataformats='NHWC', img_tensor=torch.stack(reconstruction_images, dim=0))
def train(opt): # Load Dataset content_image = image_load(opt.content) style_image = image_load(opt.style) generate_image = torch.randn_like(content_image).requires_grad_(True) # Set Optimizer optim = torch.optim.Adam([generate_image], lr=0.01) # Set Loss loss = Loss(alpha=1, beta=1000) writer = SummaryWriter() if not osp.isdir(opt.result): os.makedirs(opt.result) for epoch in range(opt.epoch): optim.zero_grad() total_loss, c_loss, s_loss = loss(generate_image, content_image, style_image) total_loss.backward() optim.step() writer.add_scalar('loss/total', total_loss, epoch) writer.add_scalar('loss/content', c_loss, epoch) writer.add_scalar('loss/style', s_loss, epoch) if (epoch + 1) % opt.display_epoch == 0: writer.add_images('image', generate_image, epoch, dataformats="NCHW") print('[Epoch {}] Total : {:.2} | C_loss : {:.2} | S_loss : {:.2}'. format(epoch + 1, total_loss, c_loss, s_loss)) imsave(generate_image, osp.join(opt.result, '{}.png'.format(epoch + 1))) imsave(content_image, 'content.png') imsave(style_image, 'style.png')
class SegmentationVisualizer(InterfaceTrainingVisualizer): def __init__(self, logdir: str =None) -> None: self.writer = SummaryWriter(logdir) def add_scalars(self, scalars: dict, step: int, prefix: str = '') -> None: for k,v in scalars.items(): self.writer.add_scalar(prefix+k, v, step) def add_images(self, images: dict, step: int, prefix: str = '') -> None: for k,v in images.items(): assert isinstance(v, torch.Tensor) grid = torchvision.utils.make_grid(v) if len(grid.size()) == 3: grid = grid.unsqueeze(0) self.writer.add_images(prefix+k, grid, step) if 'gt_masks' in images and 'images' in images: grid = torchvision.utils.make_grid(images['gt_masks'] * images['images']) if len(grid.size()) == 3: grid = grid.unsqueeze(0) self.writer.add_images(prefix+'gt_composition', grid, step) if 'masks' in images and 'images' in images: grid = torchvision.utils.make_grid(images['masks'] * images['images']) if len(grid.size()) == 3: grid = grid.unsqueeze(0) self.writer.add_images(prefix+'composition', grid, step) def close(self): self.writer.close()
def model_test(name): transforms = torchvision.transforms.Compose([ torchvision.transforms.Resize(56), torchvision.transforms.ToTensor() ]) dataset = torchvision.datasets.MNIST("data", train=False, transform=transforms, download=False) dataloader = DataLoader(dataset=dataset, batch_size=64, shuffle=True) summer = SummaryWriter("logs") for idx, data in enumerate(dataloader): imgs, labels = data print(f"shape of images: {imgs.shape}") summer.add_images("inputs", imgs, idx) net = CutNet() output = net(imgs) summer.add_images("conv_outputs", output, idx) mp_net = PoolNet() output = mp_net(imgs) summer.add_images("max_outputs", output, idx) x = torch.tensor([1., 2, 3, 4, 4, 3, 2, 1]) x = x.reshape(-1, 1, 2, 4) print(x) mp_net = PoolNet() print("mpooled result: ", mp_net(x)) kernel = torch.tensor([1, 2, 2., 1]).reshape(1, 1, 2, 2) print(f"kernel: {kernel}") y = torch.nn.functional.conv2d(x, kernel, stride=2) print(f"Conv2d Result: {y}") summer.close()
class SummaryHelper(object): def __init__(self, distributed_rank=0, log_dir=None): if distributed_rank == 0: self.w = SummaryWriter(log_dir=log_dir) else: self.w = None self.step = 0 def add_scalar(self, key, value): if self.w is None: return self.w.add_scalar(key, value, self.step) def add_text(self, key, value): if self.w is None: return self.w.add_text(key, value) def add_images(self, key, value): if self.w is None: return self.w.add_images(key, value, self.step) def set_step(self, step): self.step = step
class TensorboardLogger(Logger): def __init__(self, run_name) -> None: super(TensorboardLogger, self).__init__(run_name) from torch.utils.tensorboard import SummaryWriter self.writer = SummaryWriter(log_dir=os.path.join(self.log_dir, self.run_name)) def log_losses(self, loss_dict: Dict[str, torch.Tensor]) -> None: for loss_name in loss_dict: if torch.is_tensor(loss_dict[loss_name]): loss_dict[loss_name] = loss_dict[loss_name].detach().cpu().numpy().item() self.writer.add_scalars(f'{self.mode}/{self.scale}', loss_dict, self.step) def log_images(self, img_batch: torch.Tensor, name: str, dataformats: str = 'NCHW') -> None: if torch.is_tensor(img_batch): img_batch = img_batch.detach().cpu().numpy() self.writer.add_images(f'{self.mode}/{self.scale}/{name}', img_batch / 255, self.step, dataformats=dataformats)