def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.trainloader) for i, (image, target) in enumerate(tbar): self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() if torch_ver == "0.3": image = Variable(image) target = Variable(target) outputs = self.model(image) loss = self.criterion(outputs, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) if self.args.no_val: # save checkpoint every epoch is_best = False utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
def validation(epoch): # Fast test during the training using single-crop only global best_pred is_best = False model.eval() metric.reset() for i, (image, target) in enumerate(valloader): with torch.no_grad(): #correct, labeled, inter, union = eval_batch(model, image, target) pred = model(image)[0] target = target.cuda(args.gpu) metric.update(target, pred) pixAcc, mIoU = metric.get() if i % 100 == 0 and args.gpu == 0: print('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) if args.gpu == 0: pixAcc, mIoU = torch_dist_avg(args.gpu, pixAcc, mIoU) print('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) new_pred = (pixAcc + mIoU) / 2 if new_pred > best_pred: is_best = True best_pred = new_pred utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': best_pred, }, args, is_best)
def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.trainloader) for i, (image, target) in enumerate(tbar): self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() outputs = self.model(image) loss = self.criterion(outputs, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.logger.info('Train loss: %.3f' % (train_loss / (i + 1))) if not self.args.no_val: # save checkpoint every 10 epoch filename = "checkpoint_%s.pth.tar"%(epoch+1) is_best = False if epoch > self.args.epochs-10: utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best, filename) elif not epoch % 10: utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best, filename)
def training(self, epoch): train_loss = 0.0 ################################################ self.model.train() self.model.module.weather_classifier.eval() self.model.module.time_classifier.eval() # self.model.eval() # self.model.module.weather_classifier.train() # self.model.module.time_classifier.train() ################################################ tbar = tqdm(self.trainloader) for i, (image, target, weather, timeofday, scene) in enumerate(tbar): weather = weather.cuda() timeofday = timeofday.cuda() ################################################ # self.scheduler(self.optimizer, i, epoch, self.best_pred) ################################################ self.optimizer.zero_grad() if torch_ver == "0.3": image = Variable(image) target = Variable(target) outputs, weather_o, timeofday_o = self.model(image) # create weather / timeofday target mask ####################### b, _, h, w = weather_o.size() weather_t = torch.ones((b, h, w)).long().cuda() for bi in range(b): weather_t[bi] *= weather[bi] timeofday_t = torch.ones((b, h, w)).long().cuda() for bi in range(b): timeofday_t[bi] *= timeofday[bi] ################################################################ loss = self.criterion(outputs, target) # loss = self.criterion(weather_o, weather_t) + self.criterion(timeofday_o, timeofday_t) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.logger.info('Train loss: %.3f' % (train_loss / (i + 1))) # save checkpoint every 5 epoch is_best = False if epoch % 5 == 0: # filename = "checkpoint_%s.pth.tar"%(epoch+1) filename = "checkpoint_%s.%s.%s.%s.pth.tar" % ( self.args.log_root, self.args.checkname, self.args.model, epoch + 1) utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best, filename)
def test(args): # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # data transforms input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) # dataset if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) # dataloader loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ if args.cuda else {} test_data = data.DataLoader(testset, batch_size=args.test_batch_size, drop_last=False, shuffle=False, collate_fn=test_batchify_fn, **loader_kwargs) # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone=args.backbone, aux=args.aux, se_loss=args.se_loss, norm_layer=BatchNorm2d, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # strict=False, so that it is compatible with old pytorch saved models model.load_state_dict(checkpoint['state_dict']) utils.save_checkpoint({ 'state_dict': checkpoint['state_dict'], }, self.args, is_best, 'DANet101_reduce.pth.tar') print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))
def save_ckpt(self, epoch, score): is_best = False if score >= self.best_pred: is_best = True self.best_pred = score utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.single_device_model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
def validation(self, epoch): # Fast test during the training def eval_batch(model, image, target): outputs = model(image) preds = tuple(outputs[0], ) outputs = tuple(preds) outputs = gather(outputs, 0, dim=0) pred = outputs[0].unsqueeze(0) target = target.cuda().unsqueeze(0) correct, labeled = utils.batch_pix_accuracy(pred.data, target) inter, union = utils.batch_intersection_union( pred.data, target, self.nclass) return correct, labeled, inter, union is_best = False self.model.eval() total_inter, total_union, total_correct, total_label = 0, 0, 0, 0 tbar = tqdm(self.valloader, desc='\r') for i, (image, target) in enumerate(tbar): if torch_ver == "0.3": image = Variable(image, volatile=True) correct, labeled, inter, union = eval_batch( self.model, image, target) else: with torch.no_grad(): correct, labeled, inter, union = eval_batch( self.model, image, target) total_correct += correct total_label += labeled total_inter += inter total_union += union pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label) IoU = 1.0 * total_inter / (np.spacing(1) + total_union) mIoU = IoU.mean() tbar.set_description('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) self.logger.info('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) new_pred = (pixAcc + mIoU) / 2 if new_pred > self.best_pred: is_best = True self.best_pred = new_pred utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
def training(self, epoch): train_loss = 0.0 teacher_loss = 0.0 self.model.train() tbar = tqdm(self.trainloader) for i, (image, target) in enumerate(tbar): self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() if torch_ver == "0.3": image = Variable(image) target = Variable(target) outputs = self.model(image) with torch.no_grad(): teacher_outputs = self.teacher_model(image) teacher_targets = [] for teacher_output in teacher_outputs: pred1, se_pred, pred2 = tuple(teacher_output) teacher_targets.append(pred1) teacher_target = torch.cat(tuple(teacher_targets), 0) teacher_target = teacher_target.detach() loss_seg = 0 loss_seg = self.criterion(outputs, target) loss_seg.backward(retain_graph=True) train_loss += loss_seg.item() #loss_kd = self.criterion_kd(outputs, teacher_target) loss_kd = self.criterion_kd(outputs, teacher_target) loss_kd.backward() teacher_loss += loss_kd.item() loss = loss_seg + loss_kd #loss.backward() self.optimizer.step() tbar.set_description('Train loss: %.3f, Teacher loss: %.3f' % (train_loss / (i + 1), teacher_loss / (i + 1))) if self.args.no_val: # save checkpoint every epoch is_best = False utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
def training(self, epoch): self.model.train() tbar = tqdm(self.trainloader) # tbar = self.trainloader train_loss = 0. train_loss_all = 0. for i, (image, target) in enumerate(tbar): self.scheduler(self.optimizer, i, epoch) self.optimizer.zero_grad() if torch_ver == "0.3": image = Variable(image) target = Variable(target) outputs = self.model(image.float()) loss = self.criterion(outputs, target) loss.backward() self.optimizer.step() train_loss += loss.item() train_loss_all += loss.item() if i == 0 or (i + 1) % 20 == 0: train_loss = train_loss / min(20, i + 1) self.logger.info('Epoch [%d], Batch [%d],\t train-loss: %.4f' % (epoch + 1, i + 1, train_loss)) train_loss = 0. self.logger.info('-> Epoch [%d], Train epoch loss: %.3f' % (epoch + 1, train_loss_all / (i + 1))) if not self.args.no_val: # save checkpoint every 20 epoch filename = "checkpoint_%s.pth.tar" % (epoch + 1) if epoch % 19 == 0 or epoch == args.epochs - 1: utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), }, self.args, filename)
def training(epoch): train_sampler.set_epoch(epoch) global best_pred train_loss = 0.0 model.train() tic = time.time() for i, (image, target) in enumerate(trainloader): scheduler(optimizer, i, epoch, best_pred) optimizer.zero_grad() outputs = model(image) target = target.cuda(args.gpu) loss = criterion(*outputs, target) loss.backward() optimizer.step() train_loss += loss.item() if i % 100 == 0 and args.gpu == 0: iter_per_sec = 100.0 / ( time.time() - tic) if i != 0 else 1.0 / (time.time() - tic) tic = time.time() print('Epoch: {}, Iter: {}, Speed: {:.3f} iter/sec, Train loss: {:.3f}'. \ format(epoch, i, iter_per_sec, train_loss / (i + 1))) train_losses.append(train_loss / len(trainloader)) if epoch > 1: if train_losses[epoch] < train_losses[epoch - 1]: utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': new_preds[(epoch - 1) // 10], }, args, False, filename='checkpoint_train.pth.tar') plt.plot(train_losses) plt.xlabel('Epoch') plt.ylabel('Train_loss') plt.title('Train_Loss') plt.grid() plt.savefig('./loss_fig/train_losses.pdf') plt.savefig('./loss_fig/train_losses.svg') plt.close()
def validation(epoch): # Fast test during the training using single-crop only global best_pred is_best = False model.eval() metric.reset() for i, (image, target) in enumerate(valloader): with torch.no_grad(): pred = model(image)[0] target = target.cuda(args.gpu) metric.update(target, pred) if i % 100 == 0: all_metircs = metric.get_all() all_metircs = utils.torch_dist_sum(args.gpu, *all_metircs) pixAcc, mIoU = utils.get_pixacc_miou(*all_metircs) if args.gpu == 0: print('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) all_metircs = metric.get_all() all_metircs = utils.torch_dist_sum(args.gpu, *all_metircs) pixAcc, mIoU = utils.get_pixacc_miou(*all_metircs) if args.gpu == 0: print('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) if args.eval: return new_pred = (pixAcc + mIoU)/2 if new_pred > best_pred: is_best = True best_pred = new_pred utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': best_pred, }, args, is_best)
def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.trainloader) for i, (image, target) in enumerate(tbar): self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() if torch_ver == "0.3": image = Variable(image) target = Variable(target) outputs = self.model(image) loss = self.criterion(outputs, target) # with amp.scale_loss(loss, self.optimizer) as scaled_loss: # scaled_loss.backward() loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) if not self.args.tblogger and i % 100 == 0: self.tblogger.add_scalar('Train loss', (train_loss / (i + 1)), i + 1) if self.args.no_val: # save checkpoint every epoch is_best = False utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best, filename='checkpoint_{}.pth.tar'.format(epoch))
def validation(self, epoch): # Fast test during the training def eval_batch(model, image, target): #image = image.cuda(non_blocking=True) #target = target.cuda(non_blocking=True) image = image.to(self.device) target = target.to(self.device) _, outputs = model(image, target) #outputs = model(image) #correct, labeled = utils.batch_pix_accuracy(pred.data, target) #inter, union = utils.batch_intersection_union(pred.data, target, self.nclass) correct, labeled = utils.batch_pix_accuracy(outputs, target) inter, union = utils.batch_intersection_union( outputs, target, self.nclass) return correct, labeled, inter, union world_size = self.args.world_size is_best = False self.model.eval() #total_inter = AverageMeter() #total_union = AverageMeter() total_inter = 0 total_union = 0 total_correct = AverageMeter() total_label = AverageMeter() tbar = tqdm(self.valloader, desc='\r', disable=self.args.rank not in [0]) for i, (image, target) in enumerate(tbar): #target = target.cuda() #image_var = torch.autograd.Variable(image.cuda(), volatile=True) #target = torch.autograd.Variable(target, volatile=True) with torch.no_grad(): correct, labeled, inter, union = eval_batch( self.model, image, target) #reduced_correct = correct.clone() / world_size #reduced_label = label.clone() / world_size #reduced_inter = inter.clone() / world_size #reduced_union = union.clone() / world_size inter = inter.cuda() union = union.cuda() #reduced_correct = correct.data.clone() / world_size #reduced_label = labeled.data.clone() / world_size reduced_correct = correct.data.clone() reduced_label = labeled.data.clone() reduced_inter = inter.data.clone() reduced_union = union.data.clone() dist.all_reduce_multigpu([reduced_correct]) dist.all_reduce_multigpu([reduced_label]) dist.all_reduce_multigpu([reduced_inter]) dist.all_reduce_multigpu([reduced_union]) total_correct.update(reduced_correct.item(), 1) total_label.update(reduced_label.item(), 1) #total_inter.update(reduced_inter.item(), image.size(0)) #total_union.update(reduced_union.item(), image.size(0)) #total_correct += correct #total_label += labeled #total_inter += inter #total_union += union total_inter += reduced_inter total_union += reduced_union pixAcc = 1.0 * total_correct.sum / (np.spacing(1) + total_label.sum) IoU = 1.0 * total_inter / (np.spacing(1) + total_union) mIoU = IoU.mean() if self.args.rank == 0: tbar.set_description('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) new_pred = (pixAcc + mIoU) / 2 self.track_pixAcc = pixAcc self.track_mIoU = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred if self.args.rank == 0: utils.save_checkpoint( { 'epoch': epoch + 1, #'state_dict': self.model.module.state_dict(), 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
def training(self, epoch): #print('Training: 0') self.train_sampler.set_epoch(epoch) #train_loss = 0.0 world_size = self.args.world_size losses = AverageMeter() self.model.train() #print('Training: 1') tbar = tqdm(self.trainloader, disable=self.args.rank not in [0]) #tbar = self.trainloader #if args.local_rank == 0: # print('Training: 2') for i, (image, target) in enumerate(tbar): #if args.local_rank == 0: # print('Training: 3') #self.optimizer.zero_grad() self.scheduler(self.optimizer, i, epoch, self.best_pred) #if args.local_rank == 0: # print('Training: 4') #if torch_ver == "0.3": # image = Variable(image) # target = Variable(target) #target = target.cuda(non_blocking=True) #image = torch.autograd.Variable(image.cuda(non_blocking=True)) #target = torch.autograd.Variable(target) image = image.to(self.device) target = target.to(self.device) loss_out, _ = self.model(image, target) #loss = self.criterion(outputs, target) loss = loss_out.mean() reduced_loss = loss.data.clone() reduced_loss = reduced_loss / world_size #reduced_loss = loss dist.all_reduce_multigpu([reduced_loss]) #print('rank = %.3f(%.3f) ---> Loss = %.3f.' % (self.args.local_rank, self.args.rank, loss)) #losses.update(reduced_loss.item(), image.size(0)) losses.update(reduced_loss.item(), 1) self.model.zero_grad() loss.backward() #average_gradients(self.model) self.optimizer.step() dist.barrier() #train_loss += loss.item() self.track_loss = losses.avg if self.args.rank == 0: #tbar.set_description('Train loss: %.3f, in_max: %.3f, in_min: %.3f, out_max: %.3f, out_min: %.3f, gt_max: %.3f, gt_min: %.3f, loss0: %.3f, ws: %.3f, ns: %.3f' % (losses.avg, torch.max(image).item(), torch.min(image).item(), torch.max(outputs[0]).item(), torch.min(outputs[0]).item(), torch.max(target).item(), torch.min(target).item(), loss.item(), world_size, image.size(0))) tbar.set_description('Train loss: %.3f' % (losses.avg)) if self.args.no_val and self.args.rank == 0: # save checkpoint every epoch is_best = False utils.save_checkpoint( { 'epoch': epoch + 1, #'state_dict': self.model.module.state_dict(), 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
def validation(self, epoch): # Fast test during the training def eval_batch(model, image, target, weather, timeofday, scene): weather = weather.cuda() timeofday = timeofday.cuda() outputs, weather_o, timeofday_o = model(image) # Gathers tensors from different GPUs on a specified device # outputs = gather(outputs, 0, dim=0) pred = outputs[0] target = target.cuda() b, _, h, w = weather_o.size() weather_t = torch.ones((b, h, w)).long().cuda() for bi in range(b): weather_t[bi] *= weather[bi] timeofday_t = torch.ones((b, h, w)).long().cuda() for bi in range(b): timeofday_t[bi] *= timeofday[bi] correct, labeled = utils.batch_pix_accuracy(pred.data, target) inter, union = utils.batch_intersection_union( pred.data, target, self.nclass) correct_weather, labeled_weather = utils.batch_pix_accuracy( weather_o.data, weather_t) correct_timeofday, labeled_timeofday = utils.batch_pix_accuracy( timeofday_o.data, timeofday_t) return correct, labeled, inter, union, correct_weather, labeled_weather, correct_timeofday, labeled_timeofday is_best = False self.model.eval() total_inter, total_union, total_correct, total_label = 0, 0, 0, 0 total_correct_weather = 0 total_label_weather = 0 total_correct_timeofday = 0 total_label_timeofday = 0 tbar = tqdm(self.valloader, desc='\r') for i, (image, target, weather, timeofday, scene) in enumerate(tbar): if torch_ver == "0.3": image = Variable(image, volatile=True) correct, labeled, inter, union, correct_weather, labeled_weather, correct_timeofday, labeled_timeofday = eval_batch( self.model, image, target, weather, timeofday, scene) else: with torch.no_grad(): correct, labeled, inter, union, correct_weather, labeled_weather, correct_timeofday, labeled_timeofday = eval_batch( self.model, image, target, weather, timeofday, scene) total_correct += correct total_label += labeled total_inter += inter total_union += union pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label) IoU = 1.0 * total_inter / (np.spacing(1) + total_union) mIoU = IoU.mean() total_correct_weather += correct_weather total_label_weather += labeled_weather pixAcc_weather = 1.0 * total_correct_weather / ( np.spacing(1) + total_label_weather) total_correct_timeofday += correct_timeofday total_label_timeofday += labeled_timeofday pixAcc_timeofday = 1.0 * total_correct_timeofday / ( np.spacing(1) + total_label_timeofday) tbar.set_description( 'pixAcc: %.3f, mIoU: %.3f, pixAcc_weather: %.3f, pixAcc_timeofday: %.3f' % (pixAcc, mIoU, pixAcc_weather, pixAcc_timeofday)) self.logger.info( 'pixAcc: %.3f, mIoU: %.3f, pixAcc_weather: %.3f, pixAcc_timeofday: %.3f' % (pixAcc, mIoU, pixAcc_weather, pixAcc_timeofday)) new_pred = (pixAcc + mIoU) / 2 if new_pred > self.best_pred: is_best = True self.best_pred = new_pred utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
def validation(epoch): # Fast test during the training using single-crop only global best_pred is_best = False model.eval() metric.reset() for i, (image, target) in enumerate(valloader): with torch.no_grad(): pred = model(image)[0] target = target.cuda(args.gpu) metric.update(target, pred) if i % 100 == 0: all_metircs = metric.get_all() all_metircs = utils.torch_dist_sum(args.gpu, *all_metircs) pixAcc, mIoU = utils.get_pixacc_miou(*all_metircs) if args.gpu == 0: print('pixAcc: %.3f, mIoU1: %.3f' % (pixAcc, mIoU)) all_metircs = metric.get_all() all_metircs = utils.torch_dist_sum(args.gpu, *all_metircs) pixAcc, mIoU = utils.get_pixacc_miou(*all_metircs) if args.gpu == 0: print('pixAcc: %.3f, mIoU2: %.3f' % (pixAcc, mIoU)) p_m.append((pixAcc, mIoU)) plt.plot(p_m) plt.xlabel('10 Epoch') plt.ylabel('pixAcc, mIoU') plt.title('pixAcc, mIoU') plt.grid() plt.legend(('pixAcc', 'mIoU')) plt.savefig('./loss_fig/pixAcc_mIoU.pdf') plt.savefig('./loss_fig/pixAcc_mIoU.svg') plt.close() if args.eval: return new_pred = (pixAcc + mIoU) / 2 new_preds.append(new_pred) plt.plot(new_preds) plt.xlabel('10 Epoch') plt.ylabel('new_predication') plt.title('new_predication') plt.grid() plt.savefig('./loss_fig/new_predication.pdf') plt.savefig('./loss_fig/new_predication.svg') plt.close() if new_pred > best_pred: is_best = True best_pred = new_pred utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': best_pred, }, args, is_best, filename='checkpoint_train_{}.pth.tar'.format(epoch + 1))
def validation(self, epoch=None): # Fast test during the training def eval_batch(model, image, target, weather, timeofday, scene): outputs, weather_o, timeofday_o = model(image) # Gathers tensors from different GPUs on a specified device # outputs = gather(outputs, 0, dim=0) pred = outputs[0] b, _, h, w = weather_o.size() weather_t = torch.ones((b, h, w)).long() for bi in range(b): weather_t[bi] *= weather[bi] timeofday_t = torch.ones((b, h, w)).long() for bi in range(b): timeofday_t[bi] *= timeofday[bi] self.confusion_matrix_weather.update([ m.astype(np.int64) for m in weather_t.numpy() ], weather_o.cpu().numpy().argmax(1)) self.confusion_matrix_timeofday.update([ m.astype(np.int64) for m in timeofday_t.numpy() ], timeofday_o.cpu().numpy().argmax(1)) correct, labeled = utils.batch_pix_accuracy(pred.data, target) inter, union = utils.batch_intersection_union(pred.data, target, self.nclass) correct_weather, labeled_weather = utils.batch_pix_accuracy(weather_o.data, weather_t) correct_timeofday, labeled_timeofday = utils.batch_pix_accuracy(timeofday_o.data, timeofday_t) return correct, labeled, inter, union, correct_weather, labeled_weather, correct_timeofday, labeled_timeofday is_best = False self.model.eval() total_inter, total_union, total_correct, total_label = 0, 0, 0, 0 total_correct_weather = 0; total_label_weather = 0; total_correct_timeofday = 0; total_label_timeofday = 0 name2inter = {}; name2union = {} tbar = tqdm(self.valloader, desc='\r') for i, (image, target, weather, timeofday, scene, name) in enumerate(tbar): if torch_ver == "0.3": image = Variable(image, volatile=True) correct, labeled, inter, union, correct_weather, labeled_weather, correct_timeofday, labeled_timeofday = eval_batch(self.model, image, target, weather, timeofday, scene) else: with torch.no_grad(): correct, labeled, inter, union, correct_weather, labeled_weather, correct_timeofday, labeled_timeofday = eval_batch(self.model, image, target, weather, timeofday, scene) total_correct += correct total_label += labeled total_inter += inter total_union += union pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label) IoU = 1.0 * total_inter / (np.spacing(1) + total_union) mIoU = IoU.mean() name2inter[name[0]] = inter.tolist() name2union[name[0]] = union.tolist() total_correct_weather += correct_weather total_label_weather += labeled_weather pixAcc_weather = 1.0 * total_correct_weather / (np.spacing(1) + total_label_weather) total_correct_timeofday += correct_timeofday total_label_timeofday += labeled_timeofday pixAcc_timeofday = 1.0 * total_correct_timeofday / (np.spacing(1) + total_label_timeofday) tbar.set_description('pixAcc: %.2f, mIoU: %.2f, weather: %.2f, timeofday: %.2f' % (pixAcc, mIoU, pixAcc_weather, pixAcc_timeofday)) self.logger.info('pixAcc: %.3f, mIoU: %.3f, pixAcc_weather: %.3f, pixAcc_timeofday: %.3f' % (pixAcc, mIoU, pixAcc_weather, pixAcc_timeofday)) with open("name2inter", 'w') as fp: json.dump(name2inter, fp) with open("name2union", 'w') as fp: json.dump(name2union, fp) cm = self.confusion_matrix_weather.get_scores()['cm'] self.logger.info(str(cm)) self.confusion_matrix_weather.reset() cm = self.confusion_matrix_timeofday.get_scores()['cm'] self.logger.info(str(cm)) self.confusion_matrix_timeofday.reset() if epoch is not None: new_pred = (pixAcc + mIoU) / 2 if new_pred > self.best_pred: is_best = True self.best_pred = new_pred utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.trainloader) for i, (image, target) in enumerate(tbar): self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() if torch_ver == "0.3": image = Variable(image) target = Variable(target) outputs = self.model(image) ## original loss = self.criterion(outputs, target) loss.backward() ''' ## modified loss criterion = JointEdgeSegLoss(classes=num_classes, ignore_index=args.dataset_cls.ignore_label, upper_bound=args.wt_bound, edge_weight=args.edge_weight, seg_weight=args.seg_weight, att_weight=args.att_weight, dual_weight=args.dual_weight) train_main_loss = AverageMeter() train_edge_loss = AverageMeter() train_seg_loss = AverageMeter() train_att_loss = AverageMeter() train_dual_loss = AverageMeter() main_loss = None loss_dict = None self.criterion((seg_out, edge_out), gts) if args.seg_weight > 0: log_seg_loss = loss_dict['seg_loss'].mean().clone().detach_() train_seg_loss.update(log_seg_loss.item(), batch_pixel_size) main_loss = loss_dict['seg_loss'] if args.edge_weight > 0: log_edge_loss = loss_dict['edge_loss'].mean().clone().detach_() train_edge_loss.update(log_edge_loss.item(), batch_pixel_size) if main_loss is not None: main_loss += loss_dict['edge_loss'] else: main_loss = loss_dict['edge_loss'] if args.att_weight > 0: log_att_loss = loss_dict['att_loss'].mean().clone().detach_() train_att_loss.update(log_att_loss.item(), batch_pixel_size) if main_loss is not None: main_loss += loss_dict['att_loss'] else: main_loss = loss_dict['att_loss'] if args.dual_weight > 0: log_dual_loss = loss_dict['dual_loss'].mean().clone().detach_() train_dual_loss.update(log_dual_loss.item(), batch_pixel_size) if main_loss is not None: main_loss += loss_dict['dual_loss'] else: main_loss = loss_dict['dual_loss'] ''' self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) if self.args.no_val: # save checkpoint every epoch is_best = False utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
total_label += labeled total_inter += inter total_union += union pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label) IoU = 1.0 * total_inter / (np.spacing(1) + total_union) mIoU = IoU.mean() tbar.set_description( 'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) new_pred = (pixAcc + mIoU)/2 if new_pred > self.best_pred: >>>>>>> upstream/master is_best = True utils.save_checkpoint({ 'epoch': epoch, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_loss': self.best_loss, }, args=args, is_best=is_best) def plot(scores, name, args): plt.figure(figsize=(15, 5)) plt.plot(range(len(scores["train"])), scores["train"], label=f'train {name}') plt.plot(range(len(scores["train"])), scores["val"], label=f'val {name}') plt.title(f'{name} plot'); plt.xlabel('Epoch'); plt.ylabel(f'{name}'); plt.legend(); # plt.show() # save results