def train(train_loader, model, criteria, optimizer, device, batch_size): model.train() evaluator = Evaluator(21) evaluator.reset() train_loss = AverageMeter("Loss", ":.4") progress = ProgressMeter(len(train_loader), train_loss) for i, (image, mask) in enumerate(train_loader): image = image.to(device) mask = mask.to(device) output = model(image) loss = criteria(output, mask) predict = output.data.cpu().numpy() predict = np.argmax(predict, axis=1) target = mask.cpu().numpy() evaluator.add_batch(target, predict) train_loss.update(loss.item(), batch_size) optimizer.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: progress.print(i) evaluator.add_batch(target, predict) info = {"loss": train_loss.val, "pixel acc": evaluator.Pixel_Accuracy(), "mean acc": evaluator.Pixel_Accuracy_Class(), "miou": evaluator.Mean_Intersection_over_Union()} return info
def predict(): net = UNet(n_channels=3, n_classes=2) # net = ULeNet(n_channels=3, n_classes=6) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net.to(device=device) net.load_state_dict(torch.load(model, map_location=device)) evaluator = Evaluator(num_class=2) evaluator.reset() for img in os.listdir(dir_img): image = cv2.imread(os.path.join(dir_img, img)) # mask = cv2.imread(os.path.join(dir_mask, img)) # mask = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY) mask = Image.open(os.path.join(dir_mask, img)) mask = mask.convert('L') mask = np.array(mask) mask_class = np.unique(mask) for i in range(len(mask_class)): mask[mask == mask_class[i]] = i predict = np.zeros((image.shape[0], image.shape[1])) p_size = 128 for i in range(0, image.shape[0] - p_size, p_size): for j in range(0, image.shape[1] - p_size, p_size): patch = image[i:i + p_size, j:j + p_size, :] patch = Normalization(patch) predict[i:i + p_size, j:j + p_size] = getPredict(net=net, img=patch, device=device) evaluator.add_batch(mask, predict) # mIoU = evaluator.Mean_Intersection_over_Union() predict = Image.fromarray((predict).astype(np.uint8)) predict.save( os.path.join(dir_predict, os.path.splitext(img)[0] + '.tif')) show_confMat(evaluator.conf_mat, [str(c) for c in range(2)], re.split('[/.]', model)[1], dir_confmat)
def eval_net(net, data_loader, device): net.eval() val_batch_num = len(data_loader) eval_loss = 0 e = Evaluator(num_class=8) pixel_acc_avg = 0 mean_iou_avg = 0 fw_iou_avg = 0 with tqdm(total=val_batch_num, desc='Validation round', unit='batch', leave=False) as pbar: for idx, batch_samples in enumerate(data_loader): batch_image, batch_mask = batch_samples["image"], batch_samples[ "mask"] batch_image = batch_image.to(device=device, dtype=torch.float32) mask_true = batch_mask.to(device=device, dtype=torch.long) with torch.no_grad(): mask_pred = net(batch_image) probs = F.softmax(mask_pred, dim=1).squeeze(0) # [8, 256, 256] pre = torch.argmax(probs, dim=1) # [256,256] #???? e.add_batch(mask_true.cpu().data.numpy(), pre.cpu().data.numpy()) pixel_acc = e.Pixel_Accuracy() pixel_acc_avg += pixel_acc mean_iou = e.Mean_Intersection_over_Union() mean_iou_avg += mean_iou fw_iou = e.Frequency_Weighted_Intersection_over_Union() fw_iou_avg += fw_iou eval_loss += F.cross_entropy(mask_pred, mask_true).item() pbar.set_postfix({'eval_loss': eval_loss / (idx + 1)}) pbar.update() e.reset() print("pixel_acc_avg:" + str(pixel_acc_avg / val_batch_num)) print("mean_iou_avg:" + str(mean_iou_avg / val_batch_num)) print("fw_iou_avg:" + str(fw_iou_avg / val_batch_num)) net.train() return eval_loss / val_batch_num, pixel_acc_avg / val_batch_num, mean_iou_avg / val_batch_num, fw_iou_avg / val_batch_num
def validate(val_loader, model, criteria, device, batch_size): model.eval() evaluator = Evaluator(21) evaluator.reset() val_loss = [] with torch.no_grad(): for i, (image, mask) in enumerate(tqdm(val_loader)): image = image.to(device) mask = mask.to(device) output = model(image) loss = criteria(output, mask) predict = output.data.cpu().numpy() predict = np.argmax(predict, axis=1) target = mask.cpu().numpy() evaluator.add_batch(target, predict) val_loss.append(loss.item()) info = {"loss": sum(val_loss) / len(val_loader), "pixel acc": evaluator.Pixel_Accuracy(), "mean acc": evaluator.Pixel_Accuracy_Class(), "miou": evaluator.Mean_Intersection_over_Union()} return info
def main(cfg, gpu,args): num_class =args.num_class torch.cuda.set_device(gpu) # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=num_class, weights=cfg.MODEL.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.cuda(args.start_gpu) with open(os.path.join(args.dataroot,args.split+'.txt')) as f: lines=f.readlines() videolists = [line[:-1] for line in lines] # Dataset and Loader evaluator = Evaluator(num_class) eval_video = Evaluator(num_class) evaluator.reset() eval_video.reset() total_vmIOU=0.0 total_vfwIOU=0.0 total_video = len(videolists) v = [] n = [] for video in videolists: eval_video.reset() dataset_test = TestDataset( args.dataroot, video,args) loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=args.batchsize, shuffle=False, num_workers=5, drop_last=False) # Main loop test(segmentation_module, loader_test, gpu,args,evaluator,eval_video,video) v_mIOU =eval_video.Mean_Intersection_over_Union() v.append(v) n.append(video) print(video, v_mIOU) total_vmIOU += v_mIOU v_fwIOU = eval_video.Frequency_Weighted_Intersection_over_Union() total_vfwIOU += v_fwIOU with open("vmiou_hr.pkl", 'wb') as f: pkl.dump([v, n], f) total_vmIOU = total_vmIOU/total_video total_vfwIOU = total_vfwIOU/total_video Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}, video mIOU: {}, video fwIOU: {}".format(Acc, Acc_class, mIoU, FWIoU,total_vmIOU,total_vfwIOU)) print('Inference done!')
def main(cfg, gpu, args): if args.lesslabel: num_class = 42 else: num_class = args.num_class torch.cuda.set_device(gpu) # Network Builders if args.method == 'tdnet': segmentation_module = td4_psp(args=args, backbone='resnet18') else: net_encoder = ModelBuilder.build_encoder(arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights='') net_decoder = ModelBuilder.build_decoder(arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=num_class, weights='', use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) if args.method == 'netwarp': segmentation_module = NetWarp(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'ETC': segmentation_module = ETC(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'nonlocal3d': segmentation_module = Non_local3d(args, net_encoder, crit) elif args.method == 'our_warp': segmentation_module = ClipWarpNet(net_encoder, net_decoder, crit, args) elif args.method == 'propnet': segmentation_module = PropNet(net_encoder, net_decoder, crit, args) elif args.method == 'our_warp_merge': segmentation_module = OurWarpMerge(net_encoder, net_decoder, crit, args) elif args.method == 'clip_psp': segmentation_module = Clip_PSP(net_encoder, crit, args) elif args.method == 'clip_ocr': segmentation_module = ClipOCRNet(net_encoder, crit, args) elif args.method == 'netwarp_ocr': segmentation_module = NetWarp_ocr(net_encoder, crit, args) elif args.method == 'etc_ocr': segmentation_module = ETC_ocr(net_encoder, crit, args) else: raise NotImplementedError segmentation_module.cuda(args.start_gpu) to_load = torch.load(args.load, map_location=torch.device("cuda:" + str(args.start_gpu))) new_state_dict = OrderedDict() for k, v in to_load.items(): name = k[7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module. new_state_dict[name] = v #新字典的key值对应的value为一一对应的值。 segmentation_module.load_state_dict(new_state_dict) if args.gpu_num > 1: train_gpu_ = list(range(args.gpu_num)) train_gpu_ = [int(gpu_ + args.start_gpu) for gpu_ in train_gpu_] print(train_gpu_) segmentation_module = torch.nn.DataParallel(segmentation_module, device_ids=train_gpu_) patch_replication_callback(segmentation_module) with open(os.path.join(args.dataroot, args.split + '.txt')) as f: lines = f.readlines() videolists = [line[:-1] for line in lines] # Dataset and Loader evaluator = Evaluator(num_class) eval_video = Evaluator(num_class) evaluator.reset() eval_video.reset() total_vmIOU = 0.0 total_vfwIOU = 0.0 total_video = len(videolists) total_VC_acc = [] for video in videolists: eval_video.reset() if args.method == 'clip_psp' or args.method == 'clip_ocr': test_dataset = TestDataset_longclip(args.dataroot, video, args, is_train=False) else: test_dataset = TestDataset_clip(args.dataroot, video, args, is_train=False) loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=args.batchsize, shuffle=False, num_workers=0, drop_last=False) #### if args.method == 'nonlocal3d': gtlist_, predlist_, h, w = test_all(segmentation_module, loader_test, gpu, args, evaluator, eval_video, video) else: gtlist_, predlist_, h, w = test(segmentation_module, loader_test, gpu, args, evaluator, eval_video, video) accs = get_common(gtlist_, predlist_, args.vc_clip_num, h, w) print(sum(accs) / len(accs)) total_VC_acc.extend(accs) #### v_mIOU = eval_video.Mean_Intersection_over_Union() total_vmIOU += v_mIOU v_fwIOU = eval_video.Frequency_Weighted_Intersection_over_Union() print(video, v_mIOU) total_vfwIOU += v_fwIOU total_vmIOU = total_vmIOU / total_video total_vfwIOU = total_vfwIOU / total_video Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print( "Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}, video mIOU: {}, video fwIOU: {}" .format(Acc, Acc_class, mIoU, FWIoU, total_vmIOU, total_vfwIOU)) VC_Acc = np.array(total_VC_acc) VC_Acc = np.nanmean(VC_Acc) print("Video Consistency num :{} acc:{}".format(args.vc_clip_num, VC_Acc)) print('Inference done!')
import numpy as np import os from PIL import Image from utils import Evaluator import sys eval_ = Evaluator(124) eval_.reset() DIR = sys.argv[1] split = 'val.txt' with open(os.path.join(DIR, split), 'r') as f: lines = f.readlines() for line in lines: videolist = [line[:-1] for line in lines] PRED = sys.argv[2] for video in videolist: for tar in os.listdir(os.path.join(DIR, 'data', video, 'mask')): pred = os.path.join(PRED, video, tar) tar_ = Image.open(os.path.join(DIR, 'data', video, 'mask', tar)) tar_ = np.array(tar_) tar_ = tar_[np.newaxis, :] pred_ = Image.open(pred) pred_ = np.array(pred_) pred_ = pred_[np.newaxis, :] eval_.add_batch(tar_, pred_) Acc = eval_.Pixel_Accuracy() Acc_class = eval_.Pixel_Accuracy_Class() mIoU = eval_.Mean_Intersection_over_Union()
def main(cfg, gpu, args): num_class = args.num_class torch.cuda.set_device(gpu) # Network Builders net_encoder = ModelBuilder.build_encoder(arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder(arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=num_class, weights=cfg.MODEL.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) to_load = torch.load(args.load, map_location=torch.device("cuda:" + str(args.start_gpu))) new_state_dict = OrderedDict() for k, v in to_load.items(): name = k[7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module. new_state_dict[name] = v #新字典的key值对应的value为一一对应的值。 segmentation_module.load_state_dict(new_state_dict) print('load model parameters') segmentation_module.cuda(args.start_gpu) with open(os.path.join(args.dataroot, args.split + '.txt')) as f: lines = f.readlines() videolists = [line[:-1] for line in lines] # Dataset and Loader evaluator = Evaluator(num_class) eval_video = Evaluator(num_class) evaluator.reset() eval_video.reset() total_vmIOU = 0.0 total_vfwIOU = 0.0 total_video = len(videolists) v = [] n = [] for video in videolists: eval_video.reset() dataset_test = TestDataset(args.dataroot, video, args) loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=args.batchsize, shuffle=False, num_workers=5, drop_last=False) # Main loop test(segmentation_module, loader_test, gpu, args, evaluator, eval_video, video) if args.split != 'test': v_mIOU = eval_video.Mean_Intersection_over_Union() v.append(v) n.append(video) print(video, v_mIOU) total_vmIOU += v_mIOU v_fwIOU = eval_video.Frequency_Weighted_Intersection_over_Union() total_vfwIOU += v_fwIOU if args.split != 'test': total_vmIOU = total_vmIOU / total_video total_vfwIOU = total_vfwIOU / total_video Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print( "Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}, video mIOU: {}, video fwIOU: {}" .format(Acc, Acc_class, mIoU, FWIoU, total_vmIOU, total_vfwIOU)) print('Inference done!')