def validate(val_loader, model, criterion): # torch.backends.cudnn.enabled = False # for cudnn bug at https://github.com/pytorch/pytorch/issues/4107 if main_process(): logger.info('>>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>>') # batch_time = AverageMeter() # data_time = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.eval() # end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): # data_time.update(time.time() - end) input = input.cuda(non_blocking=True) # input = [input[0].cuda(non_blocking=True), input[1].cuda(non_blocking=True)] target = target.cuda(non_blocking=True) output = model(input) # _, H, W = target.shape # output = F.interpolate(output, size=(H, W), mode='bilinear', align_corners=True) loss = criterion(output, target) n = target.size(0) if args.multiprocessing_distributed: loss = loss * n # not considering ignore pixels count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(loss), dist.all_reduce(count) n = count.item() loss = loss / n else: loss = torch.mean(loss) output = output.detach().max(1)[1] intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy( ), union.cpu().numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) loss_meter.update(loss.item(), n) # batch_time.update(time.time() - end) # end = time.time() # if ((i + 1) % args.print_freq == 0) and main_process(): # logger.info('Test: [{}/{}] ' # 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' # 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' # 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) ' # 'Accuracy {accuracy:.4f}.'.format(i + 1, len(val_loader), # data_time=data_time, # batch_time=batch_time, # loss_meter=loss_meter, # accuracy=accuracy)) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info( 'Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format( mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format( i, iou_class[i], accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<<<<<') return loss_meter.avg, mIoU, mAcc, allAcc
def train(gpu, ngpus_per_node, argss): global args args = argss if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) teacher_model = None if args.teacher_model_path: teacher_model = PSPNet(layers=args.teacher_layers, classes=args.classes, zoom_factor=args.zoom_factor) kd_path = 'alpha_' + str(args.alpha) + '_Temp_' + str(args.temperature) args.save_path = os.path.join(args.save_path, kd_path) if not os.path.exists(args.save_path): os.mkdir(args.save_path) if args.arch == 'dct': model = DCTNet(layers=args.layers, classes=args.classes, vec_dim=300) # modules_ori = [model.layer0, model.layer1, model.layer2, model.layer3, model.layer4] # modules_new = [model.cls, model.aux] # DCT4 modules_ori = [model.cp, model.sp, model.head] modules_new = [] args.index_split = len( modules_ori ) # the module after index_split need multiply 10 at learning rate params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append( dict(params=module.parameters(), lr=args.base_lr * 10)) # args.index_split = 5 optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.sync_bn: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if teacher_model is not None: teacher_model = nn.SyncBatchNorm.convert_sync_batchnorm( teacher_model) if main_process(): global logger, writer logger = get_logger() writer = SummaryWriter(args.save_path) # tensorboardX logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) logger.info(model) if teacher_model is not None: logger.info(teacher_model) if args.distributed: torch.cuda.set_device(gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.batch_size_val = int(args.batch_size_val / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu]) if teacher_model is not None: teacher_model = torch.nn.parallel.DistributedDataParallel( teacher_model.cuda(), device_ids=[gpu]) else: model = torch.nn.DataParallel(model.cuda()) if teacher_model is not None: teacher_model = torch.nn.DataParallel(teacher_model.cuda()) if teacher_model is not None: checkpoint = torch.load( args.teacher_model_path, map_location=lambda storage, loc: storage.cuda()) teacher_model.load_state_dict(checkpoint['state_dict'], strict=False) print("=> loading teacher checkpoint '{}'".format( args.teacher_model_path)) if args.use_ohem: criterion = OhemCELoss(thresh=0.7, ignore_index=args.ignore_label).cuda(gpu) else: criterion = nn.CrossEntropyLoss( ignore_index=args.ignore_label).cuda(gpu) kd_criterion = None if teacher_model is not None: kd_criterion = KDLoss(ignore_index=args.ignore_label).cuda(gpu) if args.weight: if os.path.isfile(args.weight): if main_process(): logger.info("=> loading weight: '{}'".format(args.weight)) checkpoint = torch.load(args.weight) model.load_state_dict(checkpoint['state_dict']) if main_process(): logger.info("=> loaded weight '{}'".format(args.weight)) else: if main_process(): logger.info("=> mp weight found at '{}'".format(args.weight)) best_mIoU_val = 0.0 if args.resume: if os.path.isfile(args.resume): if main_process(): logger.info("=> loading checkpoint '{}'".format(args.resume)) # Load all tensors onto GPU checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda()) args.start_iter = checkpoint['iteration'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) best_mIoU_val = checkpoint['best_mIoU_val'] if main_process(): logger.info("=> loaded checkpoint '{}' (iteration {})".format( args.resume, checkpoint['iteration'])) else: if main_process(): logger.info("=> no checkpoint found at '{}'".format( args.resume)) value_scale = 255 ## RGB mean & std rgb_mean = [0.485, 0.456, 0.406] rgb_mean = [item * value_scale for item in rgb_mean] rgb_std = [0.229, 0.224, 0.225] rgb_std = [item * value_scale for item in rgb_std] # DCT mean & std dct_mean = dct_mean_std.train_upscaled_static_mean dct_mean = [item * value_scale for item in dct_mean] dct_std = dct_mean_std.train_upscaled_static_std dct_std = [item * value_scale for item in dct_std] train_transform = transform.Compose([ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=rgb_mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=rgb_mean, ignore_label=args.ignore_label), # transform.GetDctCoefficient(), transform.ToTensor(), transform.Normalize(mean=rgb_mean, std=rgb_std) ]) train_data = dataset.SemData(split='train', img_type='rgb', data_root=args.data_root, data_list=args.train_list, transform=train_transform) # train_transform = transform_rgbdct.Compose([ # transform_rgbdct.RandScale([args.scale_min, args.scale_max]), # transform_rgbdct.RandRotate([args.rotate_min, args.rotate_max], padding=rgb_mean, ignore_label=args.ignore_label), # transform_rgbdct.RandomGaussianBlur(), # transform_rgbdct.RandomHorizontalFlip(), # transform_rgbdct.Crop([args.train_h, args.train_w], crop_type='rand', padding=rgb_mean, ignore_label=args.ignore_label), # transform_rgbdct.GetDctCoefficient(), # transform_rgbdct.ToTensor(), # transform_rgbdct.Normalize(mean_rgb=rgb_mean, mean_dct=dct_mean, std_rgb=rgb_std, std_dct=dct_std)]) # train_data = dataset.SemData(split='train', img_type='rgb&dct', data_root=args.data_root, data_list=args.train_list, transform=train_transform) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, \ shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, \ sampler=train_sampler, drop_last=True) if args.evaluate: # val_h = int(args.base_h * args.scale) # val_w = int(args.base_w * args.scale) val_transform = transform.Compose([ transform.Crop([args.train_h, args.train_w], crop_type='center', padding=rgb_mean, ignore_label=args.ignore_label), # transform.Resize(size=(val_h, val_w)), # transform.GetDctCoefficient(), transform.ToTensor(), transform.Normalize(mean=rgb_mean, std=rgb_std) ]) val_data = dataset.SemData(split='val', img_type='rgb', data_root=args.data_root, data_list=args.val_list, transform=val_transform) # val_transform = transform_rgbdct.Compose([ # transform_rgbdct.Crop([args.train_h, args.train_w], crop_type='center', padding=rgb_mean, ignore_label=args.ignore_label), # # transform.Resize(size=(val_h, val_w)), # transform_rgbdct.GetDctCoefficient(), # transform_rgbdct.ToTensor(), # transform_rgbdct.Normalize(mean_rgb=rgb_mean, mean_dct=dct_mean, std_rgb=rgb_std, std_dct=dct_std)]) # val_data = dataset.SemData(split='val', img_type='rgb&dct', data_root=args.data_root, data_list=args.val_list, transform=val_transform) if args.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size_val, \ shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) # Training Loop batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() # aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() # switch to train mode model.train() if teacher_model is not None: teacher_model.eval() end = time.time() max_iter = args.max_iter data_iter = iter(train_loader) epoch = 0 for current_iter in range(args.start_iter, args.max_iter): try: input, target = next(data_iter) if not target.size(0) == args.batch_size: raise StopIteration except StopIteration: epoch += 1 if args.distributed: train_sampler.set_epoch(epoch) if main_process(): logger.info('train_sampler.set_epoch({})'.format(epoch)) data_iter = iter(train_loader) input, target = next(data_iter) # need to update the AverageMeter for new epoch main_loss_meter = AverageMeter() # aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() # measure data loading time data_time.update(time.time() - end) input = input.cuda(non_blocking=True) # input = [input[0].cuda(non_blocking=True), input[1].cuda(non_blocking=True)] target = target.cuda(non_blocking=True) # compute output # main_out, aux_out = model(input) main_out = model(input) # _, H, W = target.shape # main_out = F.interpolate(main_out, size=(H, W), mode='bilinear', align_corners=True) main_loss = criterion(main_out, target) # aux_loss = criterion(aux_out, target) if not args.multiprocessing_distributed: # main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss) main_loss = torch.mean(main_loss) # loss = main_loss + args.aux_weight * aux_loss loss = main_loss optimizer.zero_grad() loss.backward() optimizer.step() n = target.size(0) # if args.multiprocessing_distributed: # main_loss, aux_loss, loss = main_loss.detach() * n, aux_loss * n, loss * n # not considering ignore pixels # count = target.new_tensor([n], dtype=torch.long) # dist.all_reduce(main_loss), dist.all_reduce(aux_loss), dist.all_reduce(loss), dist.all_reduce(count) # n = count.item() # main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n if args.multiprocessing_distributed: main_loss, loss = main_loss.detach( ) * n, loss * n # not considering ignore pixels count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(main_loss), dist.all_reduce(loss), dist.all_reduce( count) n = count.item() main_loss, loss = main_loss / n, loss / n main_out = main_out.detach().max(1)[1] intersection, union, target = intersectionAndUnionGPU( main_out, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce( union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) # aux_loss_meter.update(aux_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() # Using Poly strategy to change the learning rate current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) for index in range(0, args.index_split ): # args.index_split = 5 -> ResNet has 5 stages optimizer.param_groups[index]['lr'] = current_lr for index in range(args.index_split, len(optimizer.param_groups)): optimizer.param_groups[index]['lr'] = current_lr * 10 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) iter_log = current_iter + 1 if iter_log % args.print_freq == 0 and main_process(): logger.info('Iter [{}/{}] ' 'LR: {lr:.3e}, ' 'ETA: {remain_time}, ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}), ' 'Batch: {batch_time.val:.3f} ({batch_time.avg:.3f}), ' 'MainLoss: {main_loss_meter.val:.4f}, ' # 'AuxLoss: {aux_loss_meter.val:.4f}, ' 'Loss: {loss_meter.val:.4f}, ' 'Accuracy: {accuracy:.4f}.'.format( iter_log, args.max_iter, lr=current_lr, remain_time=remain_time, data_time=data_time, batch_time=batch_time, main_loss_meter=main_loss_meter, # aux_loss_meter=aux_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) if main_process(): writer.add_scalar('loss_train_batch', main_loss_meter.val, iter_log) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), iter_log) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), iter_log) writer.add_scalar('allAcc_train_batch', accuracy, iter_log) if iter_log % len( train_loader ) == 0 or iter_log == max_iter: # for each epoch or the max interation iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU_train = np.mean(iou_class) mAcc_train = np.mean(accuracy_class) allAcc_train = sum( intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) loss_train = main_loss_meter.avg if main_process(): logger.info('Train result at iteration [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'\ .format(iter_log, max_iter, mIoU_train, mAcc_train, allAcc_train)) writer.add_scalar('loss_train', loss_train, iter_log) writer.add_scalar('mIoU_train', mIoU_train, iter_log) writer.add_scalar('mAcc_train', mAcc_train, iter_log) writer.add_scalar('allAcc_train', allAcc_train, iter_log) # if iter_log % args.save_freq == 0: is_best = False if args.evaluate: loss_val, mIoU_val, mAcc_val, allAcc_val = validate( val_loader, model, criterion) model.train() # the mode change from eval() to train() if main_process(): writer.add_scalar('loss_val', loss_val, iter_log) writer.add_scalar('mIoU_val', mIoU_val, iter_log) writer.add_scalar('mAcc_val', mAcc_val, iter_log) writer.add_scalar('allAcc_val', allAcc_val, iter_log) if best_mIoU_val < mIoU_val: is_best = True best_mIoU_val = mIoU_val logger.info('==>The best val mIoU: %.3f' % (best_mIoU_val)) if main_process(): save_checkpoint( { 'iteration': iter_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_mIoU_val': best_mIoU_val }, is_best, args.save_path) logger.info('Saving checkpoint to:{}/iter_{}.pth or last.pth with mIoU:{:.3f}'\ .format(args.save_path, iter_log, mIoU_val)) if is_best: logger.info('Saving checkpoint to:{}/best.pth with mIoU:{:.3f}'\ .format(args.save_path, best_mIoU_val)) if main_process(): writer.close( ) # it must close the writer, otherwise it will appear the EOFError! logger.info( '==>Training done! The best val mIoU during training: %.3f' % (best_mIoU_val))
def evaluate(): args = get_parser() logger = get_logger() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(x) for x in args.test_gpu) logger.info(args) logger.info("=> creating model ...") logger.info("Classes: {}".format(args.classes)) if args.arch == 'psp': model = PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor) elif args.arch == 'nonlocal': model = Nonlocal(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor) elif args.arch == 'danet': model = DANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor) elif args.arch == 'sanet': model = SANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor) elif args.arch == 'fanet': model = FANet(layers=args.layers, classes=args.classes) elif args.arch == 'fftnet': model = FFTNet(layers=args.layers, classes=args.classes) elif args.arch == 'fftnet_23': model = FFTNet23(layers=args.layers, classes=args.classes) elif args.arch == 'bise_v1': model = BiseNet(layers=args.layers, classes=args.classes, with_sp=args.with_sp) elif args.arch == 'dct': model = DCTNet(layers=args.layers, classes=args.classes, vec_dim=300) elif args.arch == 'triple': model = TriSeNet(layers=args.layers, classes=args.classes) elif args.arch == 'triple_1': model = TriSeNet1(layers=args.layers, classes=args.classes) elif args.arch == 'ppm': model = PPM_Net(backbone=args.backbone, layers=args.layers, classes=args.classes) elif args.arch == 'fc': model = FC_Net(backbone=args.backbone, layers=args.layers, classes=args.classes) logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) # checkpoint = torch.load(args.model_path, map_location=torch.device('cpu')) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=True) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format( args.model_path)) value_scale = 255 ## RGB mean & std rgb_mean = [0.485, 0.456, 0.406] rgb_mean = [item * value_scale for item in rgb_mean] rgb_std = [0.229, 0.224, 0.225] rgb_std = [item * value_scale for item in rgb_std] # DCT mean & std dct_mean = dct_mean_std.train_upscaled_static_mean dct_mean = [item * value_scale for item in dct_mean] dct_std = dct_mean_std.train_upscaled_static_std dct_std = [item * value_scale for item in dct_std] val_h = int(args.base_h * args.scale) val_w = int(args.base_w * args.scale) val_transform = transform.Compose([ transform.Resize(size=(val_h, val_w)), # transform.GetDctCoefficient(), transform.ToTensor(), transform.Normalize(mean=rgb_mean, std=rgb_std) ]) val_data = dataset.SemData(split='val', img_type='rgb', data_root=args.data_root, data_list=args.val_list, transform=val_transform) val_loader = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) # val_transform = transform.Compose([ # # transform.Resize(size=(val_h, val_w)), # transform.GetDctCoefficient(), # transform.ToTensor(), # transform.Normalize(mean_rgb=rgb_mean, mean_dct=dct_mean, std_rgb=rgb_std, std_dct=dct_std)]) # val_data = dataset.SemData(split='val', img_type='rgb&dct', data_root=args.data_root, data_list=args.val_list, transform=val_transform) # val_loader = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) # test_transform = transform.Compose([ # transform.ToTensor(), # transform.Normalize(mean=mean, std=std)]) # # test_transform = transform.Compose([transform.ToTensor()]) # test_data = dataset.SemData(split='test', data_root=args.data_root, data_list=args.test_list, transform=test_transform) # val_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) logger.info('>>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>>') batch_time = AverageMeter() data_time = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.eval() end = time.time() results = [] with torch.no_grad(): for i, (input, target) in enumerate(val_loader): data_time.update(time.time() - end) # _, _, H, W = input.shape input = input.cuda(non_blocking=True) # input = [input[0].cuda(non_blocking=True), input[1].cuda(non_blocking=True)] target = target.cuda(non_blocking=True) # if args.scale != 1.0: # input = F.interpolate(input, size=(val_h, val_w), mode='bilinear', align_corners=True) if args.teacher_model_path != None and args.arch == 'sanet': output, _, _ = model(input) else: output = model(input) # if args.scale != 1.0: # output = F.interpolate(output, size=(H, W), mode='bilinear', align_corners=True) _, H, W = target.shape # output = F.interpolate(output, size=(H, W), mode='bilinear', align_corners=True) output = output.detach().max(1)[1] results.append(output.cpu().numpy().reshape(H, W)) intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy( ), union.cpu().numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) batch_time.update(time.time() - end) end = time.time() if ((i + 1) % 10 == 0) or (i + 1 == len(val_loader)): logger.info( 'Val: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Accuracy {accuracy:.4f}.'.format(i + 1, len(val_loader), data_time=data_time, batch_time=batch_time, accuracy=accuracy)) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) logger.info('Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format( mIoU, mAcc, allAcc)) for i in range(args.classes): logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format( i, iou_class[i], accuracy_class[i])) logger.info('<<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<<<<<')
def train(local_rank, train_loader, model, teacher_model, criterion, kd_criterion, optimizer, epoch): # torch.backends.cudnn.enabled = True batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() # switch to train mode model.train() if teacher_model is not None: teacher_model.eval() end = time.time() max_iter = args.epochs * len(train_loader) # initialize for poly learning rate for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) if args.zoom_factor != 8: h = int((target.size()[1] - 1) / 8 * args.zoom_factor + 1) w = int((target.size()[2] - 1) / 8 * args.zoom_factor + 1) # 'nearest' mode doesn't support align_corners mode and 'bilinear' mode is fine for downsampling target = F.interpolate(target.unsqueeze(1).float(), size=(h, w), mode='bilinear', align_corners=True).squeeze(1).long() input = input.cuda(local_rank, non_blocking=True) target = target.cuda(local_rank, non_blocking=True) main_out, aux_out = model(input) if teacher_model is not None: with torch.no_grad(): teacher_out = teacher_model(input) main_loss = kd_criterion(main_out, target, teacher_out, alpha=args.alpha, temperature=args.temperature) del teacher_out # delete the teacher_out for releasing the gpu memory. else: main_loss = criterion(main_out, target) aux_loss = criterion(aux_out, target) if not args.multiprocessing_distributed: main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss) loss = main_loss + args.aux_weight * aux_loss optimizer.zero_grad() loss.backward() optimizer.step() n = input.size(0) if args.multiprocessing_distributed: main_loss, aux_loss, loss = main_loss.detach() * n, aux_loss * n, loss * n # not considering ignore pixels count = target.new_tensor([n], dtype=torch.long) dist.all_reduce(main_loss), dist.all_reduce(aux_loss), dist.all_reduce(loss), dist.all_reduce(count) n = count.item() main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n main_out = main_out.detach().max(1)[1] intersection, union, target = intersectionAndUnionGPU(main_out, target, args.classes, args.ignore_label) if args.multiprocessing_distributed: dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce(target) intersection, union, target = intersection.cpu().numpy(), union.cpu().numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update(union), target_meter.update(target) accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) aux_loss_meter.update(aux_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() current_iter = epoch * len(train_loader) + i + 1 current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) for index in range(0, args.index_split): # args.index_split = 5 -> ResNet has 5 stages optimizer.param_groups[index]['lr'] = current_lr for index in range(args.index_split, len(optimizer.param_groups)): optimizer.param_groups[index]['lr'] = current_lr * 10 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if (i + 1) % args.print_freq == 0 and main_process(): logger.info('Epoch: [{}/{}][{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Remain {remain_time} ' 'MainLoss {main_loss_meter.val:.4f} ' 'AuxLoss {aux_loss_meter.val:.4f} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format(epoch + 1, args.epochs, i + 1, len(train_loader), data_time=data_time, batch_time=batch_time, remain_time=remain_time, main_loss_meter=main_loss_meter, aux_loss_meter=aux_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) if main_process(): writer.add_scalar('loss_train_batch', main_loss_meter.val, current_iter) writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) writer.add_scalar('allAcc_train_batch', accuracy, current_iter) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) if main_process(): logger.info('Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(epoch+1, args.epochs, mIoU, mAcc, allAcc)) return main_loss_meter.avg, mIoU, mAcc, allAcc
def evaluate(): args = get_parser() os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(x) for x in args.test_gpu) if args.arch == 'triple': model = TriSeNet(layers=args.layers, classes=args.classes) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True if os.path.isfile(args.model_path): print("=> loading checkpoint '{}'".format(args.model_path)) # checkpoint = torch.load(args.model_path, map_location=torch.device('cpu')) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=True) print("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format( args.model_path)) value_scale = 255 ## RGB mean & std rgb_mean = [0.485, 0.456, 0.406] rgb_mean = [item * value_scale for item in rgb_mean] rgb_std = [0.229, 0.224, 0.225] rgb_std = [item * value_scale for item in rgb_std] val_transform = transform.Compose([ transform.ToTensor(), transform.Normalize(mean=rgb_mean, std=rgb_std) ]) train_data = SemData(split='train', data_root=args.data_root, data_list=args.train_list, transform=val_transform) val_loader = torch.utils.data.DataLoader(train_data, batch_size=1, \ shuffle=False, num_workers=args.workers, pin_memory=True) print('>>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>>') batch_time = AverageMeter() data_time = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.eval() end = time.time() results = [] with torch.no_grad(): for i, (input, target) in enumerate(val_loader): data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) output = model(input) _, H, W = target.shape output = output.detach().max(1)[1] results.append(output.cpu().numpy().reshape(H, W)) intersection, union, target = intersectionAndUnionGPU( output, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy( ), union.cpu().numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) batch_time.update(time.time() - end) end = time.time() print('Val: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Accuracy {accuracy:.4f}.'.format(i + 1, len(val_loader), data_time=data_time, batch_time=batch_time, accuracy=accuracy)) iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) mIoU = np.mean(iou_class) mAcc = np.mean(accuracy_class) allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) print('Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format( mIoU, mAcc, allAcc)) for i in range(args.classes): print('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format( i, iou_class[i], accuracy_class[i])) print('<<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<<<<<') print('Convert to Label ID') result_files = results2img(results=results, data_root=args.data_root, data_list=args.train_list, save_dir='./visualization/train_result', to_label_id=True) print('Convert to Label ID Finished')
def train(gpu, ngpus_per_node, argss): global args args = argss if args.arch == 'triple': model = TriSeNet(layers=args.layers, classes=args.classes) modules_ori = [ model.layer0, model.layer1, model.layer2, model.layer3, model.layer4 ] # modules_new = [model.down_8_32, model.sa_8_32, model.seg_head] modules_new = [] for key, value in model._modules.items(): if "layer" not in key: modules_new.append(value) args.index_split = len( modules_ori ) # the module after index_split need multiply 10 at learning rate params_list = [] for module in modules_ori: params_list.append(dict(params=module.parameters(), lr=args.base_lr)) for module in modules_new: params_list.append( dict(params=module.parameters(), lr=args.base_lr * 10)) optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) print("=> creating model ...") print("Classes: {}".format(args.classes)) print(model) model = torch.nn.DataParallel(model.cuda()) cudnn.benchmark = True criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label).cuda(gpu) value_scale = 255 ## RGB mean & std mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.RandScale([args.scale_min, args.scale_max]), transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), transform.RandomGaussianBlur(), transform.RandomHorizontalFlip(), transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) train_data = SemData(split='train', data_root=args.data_root, data_list=args.train_list, transform=train_transform) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, \ shuffle=True, num_workers=args.workers, pin_memory=True, \ sampler=None, drop_last=True) # Training Loop batch_time = AverageMeter() data_time = AverageMeter() main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() model.train() end = time.time() max_iter = args.max_iter data_iter = iter(train_loader) epoch = 0 for current_iter in range(args.start_iter, args.max_iter): try: input, target = next(data_iter) if not input.size(0) == args.batch_size: raise StopIteration except StopIteration: epoch += 1 data_iter = iter(train_loader) input, target = next(data_iter) # need to update the AverageMeter for new epoch main_loss_meter = AverageMeter() aux_loss_meter = AverageMeter() loss_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() target_meter = AverageMeter() # measure data loading time data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) main_out = model(input) main_loss = criterion(main_out, target) aux_loss = torch.tensor(0).cuda() loss = main_loss + args.aux_weight * aux_loss optimizer.zero_grad() loss.backward() optimizer.step() n = input.size(0) main_out = main_out.detach().max(1)[1] intersection, union, target = intersectionAndUnionGPU( main_out, target, args.classes, args.ignore_label) intersection, union, target = intersection.cpu().numpy(), union.cpu( ).numpy(), target.cpu().numpy() intersection_meter.update(intersection), union_meter.update( union), target_meter.update(target) accuracy = sum( intersection_meter.val) / (sum(target_meter.val) + 1e-10) main_loss_meter.update(main_loss.item(), n) aux_loss_meter.update(aux_loss.item(), n) loss_meter.update(loss.item(), n) batch_time.update(time.time() - end) end = time.time() # Using Poly strategy to change the learning rate current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) for index in range(0, args.index_split ): # args.index_split = 5 -> ResNet has 5 stages optimizer.param_groups[index]['lr'] = current_lr for index in range(args.index_split, len(optimizer.param_groups)): optimizer.param_groups[index]['lr'] = current_lr * 10 remain_iter = max_iter - current_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) iter_log = current_iter + 1 if iter_log % args.print_freq == 0: print('Iteration: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'ETA {remain_time} ' 'MainLoss {main_loss_meter.val:.4f} ' 'AuxLoss {aux_loss_meter.val:.4f} ' 'Loss {loss_meter.val:.4f} ' 'Accuracy {accuracy:.4f}.'.format( iter_log, args.max_iter, data_time=data_time, batch_time=batch_time, remain_time=remain_time, main_loss_meter=main_loss_meter, aux_loss_meter=aux_loss_meter, loss_meter=loss_meter, accuracy=accuracy)) save_checkpoint( { 'iteration': iter_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, False, args.save_path)