lr_scheduler.step() for i_batch, sampled_batch in enumerate(loader): data, target = sampled_batch if torch.cuda.is_available(): data, target = Variable(data).cuda(), Variable(target).cuda() else: data, target = Variable(data), Variable(target) optimizer.zero_grad() pred = net(data) loss = loss_fn(pred, target.float()) loss.backward() optimizer.step() logger.info('[epoch: {}, batch: {}] Training loss: {}'.format( epoch, i_batch, loss.data[0])) tb_logger.scalar_summary('loss', loss.data[0], epoch * niter_per_epoch + i_batch + 1) # (2) Log values and gradients of the parameters (histogram) for tag, value in net.named_parameters(): tag = tag.replace('.', '/') tb_logger.histo_summary(tag, value.data.cpu().numpy(), epoch + 1) tb_logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), epoch + 1) if (epoch + 1) % 10 == 0: cp_path = opj(CHECKPOINTS_PATH, cur_time, 'model_%s' % epoch) mkdir_r(dirname(cp_path)) torch.save(net.state_dict(), cp_path)
# Compute accuracy _, argmax = torch.max(outputs, 1) accuracy = (labels == argmax.squeeze()).float().mean() if (step + 1) % 100 == 0: print('Step [{}/{}], Loss: {:.4f}, Acc: {:.2f}'.format( step + 1, total_step, loss.item(), accuracy.item())) # ================================================================== # # Tensorboard Logging # # ================================================================== # # 1. Log scalar values (scalar summary) info = {'loss': loss.item(), 'accuracy': accuracy.item()} for tag, value in info.items(): logger.scalar_summary(tag, value, step + 1) # 2. Log values and gradients of the parameters (histogram summary) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') logger.histo_summary(tag, value.data.cpu().numpy(), step + 1) logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), step + 1) # 3. Log training images (image summary) info = {'images': images.view(-1, 28, 28)[:10].cpu().numpy()} # [:10]:取前 9 张? for tag, images in info.items(): logger.image_summary(tag, images, step + 1)