def predict_image(net, img_transform, args, img): """Using the network generated from "setup_net(...)", make a prediction on the input image. Arguments: net {pytorch model} -- Ideally the network generated from "setup_net(...)". img_transform {transform} -- Output from setup_net. args {Args} -- Arguments for the network from setup_net. img {numpy.array} -- Input image. Keyword Arguments: frame {type} -- [description] (default: {None}) Returns: [np.array, np.array] -- Colorized & non-colorized predictions resepectively. """ img_tensor = img_transform(img) with torch.no_grad(): img = img_tensor.unsqueeze(0).cuda() pred = net(img) pred = pred.cpu().numpy().squeeze() pred = np.argmax(pred, axis=0) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) colorized = cityscapes.colorize_mask(pred) o = np.array(colorized.convert('RGB')) o = o[:, :, ::-1].copy() return o, pred
def test(img_path): net = PSPNet(num_classes=cityscapes.num_classes) if torch.cuda.is_available(): net = nn.DataParallel(net) # 添加了该句后, 就能正常导入在多GPU上训练的模型参数了 print('loading model ' + args['snapshot'] + '...') model_state_path = os.path.join(ckpt_path, args['exp_name'], args['snapshot']) net.load_state_dict(torch.load(model_state_path)) net.cuda() net.eval() mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) val_input_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) img = Image.open(img_path).convert('RGB') img = val_input_transform(img) img.unsqueeze_(0) with torch.no_grad(): img = Variable(img).cuda() output = net(img) # prediction = output.data.max(1)[1].squeeze_(1).squeeze_(0).cpu().numpy() # prediction = voc.colorize_mask(prediction) # prediction.save(os.path.join(ckpt_path, args['exp_name'], 'test', img_name + '.png')) prediction = output.data.max(1)[1].squeeze_(1).squeeze_( 0).cpu().numpy() test_dir = os.path.join(ckpt_path, args['exp_name'], 'test') img_name = os.path.basename(img_path) img_name = os.path.splitext(img_name)[0] print(img_name) if args['val_save_to_img_file']: check_mkdir(test_dir) predictions_pil = cityscapes.colorize_mask(prediction) if args['val_save_to_img_file']: predictions_pil.save( os.path.join(test_dir, '%s_prediction.png' % img_name))
def validate(val_loader, net, criterion, optimizer, epoch, train_args, restore, visualize): net.eval() val_loss = AverageMeter() inputs_all, gts_all, predictions_all = [], [], [] for vi, data in enumerate(val_loader): inputs, gts = data N = inputs.size(0) inputs = Variable(inputs, volatile=True).cuda() gts = Variable(gts, volatile=True).cuda() outputs = net(inputs) predictions = outputs.data.max(1)[1].squeeze_(1).cpu().numpy() val_loss.update(criterion(outputs, gts).data[0] / N, N) for i in inputs: if random.random() > train_args['val_img_sample_rate']: inputs_all.append(None) else: inputs_all.append(i.data.cpu()) gts_all.append(gts.data.cpu().numpy()) predictions_all.append(predictions) gts_all = np.concatenate(gts_all) predictions_all = np.concatenate(predictions_all) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, cityscapes.num_classes) if mean_iu > train_args['best_record']['mean_iu']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr'] ) torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth')) torch.save(optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth')) if train_args['val_save_to_img_file']: to_save_dir = os.path.join(ckpt_path, exp_name, str(epoch)) check_mkdir(to_save_dir) val_visual = [] for idx, data in enumerate(zip(inputs_all, gts_all, predictions_all)): if data[0] is None: continue input_pil = restore(data[0]) gt_pil = cityscapes.colorize_mask(data[1]) predictions_pil = cityscapes.colorize_mask(data[2]) if train_args['val_save_to_img_file']: input_pil.save(os.path.join(to_save_dir, '%d_input.png' % idx)) predictions_pil.save(os.path.join(to_save_dir, '%d_prediction.png' % idx)) gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx)) val_visual.extend([visualize(input_pil.convert('RGB')), visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB'))]) val_visual = torch.stack(val_visual, 0) val_visual = vutils.make_grid(val_visual, nrow=3, padding=5) writer.add_image(snapshot_name, val_visual) print('-----------------------------------------------------------------------------------------------------------') print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % ( train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'])) print('-----------------------------------------------------------------------------------------------------------') writer.add_scalar('val_loss', val_loss.avg, epoch) writer.add_scalar('acc', acc, epoch) writer.add_scalar('acc_cls', acc_cls, epoch) writer.add_scalar('mean_iu', mean_iu, epoch) writer.add_scalar('fwavacc', fwavacc, epoch) writer.add_scalar('lr', optimizer.param_groups[1]['lr'], epoch) net.train() return val_loss.avg
def validate(val_loader, net, criterion, optimizer, epoch, iter_num, train_args, visualize): net.eval() val_loss = AverageMeter() gts_all = np.zeros((len(val_loader), int( args['longer_size'] / 2), int(args['longer_size'])), dtype=int) predictions_all = np.zeros((len(val_loader), int( args['longer_size'] / 2), int(args['longer_size'])), dtype=int) for vi, data in enumerate(val_loader): input, gt, slices_info = data assert len(input.size()) == 5 and len(gt.size()) == 4 and len( slices_info.size()) == 3 input.transpose_(0, 1) gt.transpose_(0, 1) slices_info.squeeze_(0) assert input.size()[3:] == gt.size()[2:] count = torch.zeros(int(args['longer_size'] / 2), args['longer_size']) # .cuda() output = torch.zeros(cityscapes.num_classes, int(args['longer_size'] / 2), args['longer_size']) # .cuda() slice_batch_pixel_size = input.size(1) * input.size(3) * input.size(4) for input_slice, gt_slice, info in zip(input, gt, slices_info): input_slice = Variable(input_slice) # .cuda() gt_slice = Variable(gt_slice) # .cuda() output_slice = net(input_slice) assert output_slice.size()[2:] == gt_slice.size()[1:] assert output_slice.size()[1] == cityscapes.num_classes output[:, info[0]:info[1], info[2]:info[3]] += output_slice[ 0, :, :info[4], :info[5]].data gts_all[vi, info[0]:info[1], info[2]:info[3]] += gt_slice[ 0, :info[4], :info[5]].data.cpu().numpy() count[info[0]:info[1], info[2]:info[3]] += 1 val_loss.update( criterion(output_slice, gt_slice).data[0], slice_batch_pixel_size) output /= count gts_all[vi, :, :] /= count.cpu().numpy().astype(int) predictions_all[vi, :, :] = output.max(0)[1].squeeze_(0).cpu().numpy() print('validating: %d / %d' % (vi + 1, len(val_loader))) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, cityscapes.num_classes) if val_loss.avg < train_args['best_record']['val_loss']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['iter'] = iter_num train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_iter_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % ( epoch, iter_num, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr']) torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth')) torch.save( optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth')) if train_args['val_save_to_img_file']: to_save_dir = os.path.join(ckpt_path, exp_name, '%d_%d' % (epoch, iter_num)) check_mkdir(to_save_dir) val_visual = [] for idx, data in enumerate(zip(gts_all, predictions_all)): gt_pil = cityscapes.colorize_mask(data[0]) predictions_pil = cityscapes.colorize_mask(data[1]) if train_args['val_save_to_img_file']: predictions_pil.save( os.path.join(to_save_dir, '%d_prediction.png' % idx)) gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx)) val_visual.extend([ visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB')) ]) val_visual = torch.stack(val_visual, 0) val_visual = torchvision.utils.make_grid(val_visual, nrow=2, padding=5) writer.add_image(snapshot_name, val_visual) print( '-----------------------------------------------------------------------------------------------------------' ) print( '[epoch %d], [iter %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % (epoch, iter_num, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print( 'best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d], ' '[iter %d]' % (train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'], train_args['best_record']['iter'])) print( '-----------------------------------------------------------------------------------------------------------' ) writer.add_scalar('val_loss', val_loss.avg, epoch) writer.add_scalar('acc', acc, epoch) writer.add_scalar('acc_cls', acc_cls, epoch) writer.add_scalar('mean_iu', mean_iu, epoch) writer.add_scalar('fwavacc', fwavacc, epoch) net.train() return val_loss.avg
def validate(val_loader, net, criterion, optimizer, epoch, iter_num, train_args, visualize): # the following code is written assuming that batch size is 1 net.eval() val_loss = AverageMeter() gts_all = np.zeros((len(val_loader), args['longer_size'] / 2, args['longer_size']), dtype=int) predictions_all = np.zeros((len(val_loader), args['longer_size'] / 2, args['longer_size']), dtype=int) for vi, data in enumerate(val_loader): input, gt, slices_info = data assert len(input.size()) == 5 and len(gt.size()) == 4 and len(slices_info.size()) == 3 input.transpose_(0, 1) gt.transpose_(0, 1) slices_info.squeeze_(0) assert input.size()[3:] == gt.size()[2:] count = torch.zeros(args['longer_size'] / 2, args['longer_size']).cuda() output = torch.zeros(cityscapes.num_classes, args['longer_size'] / 2, args['longer_size']).cuda() slice_batch_pixel_size = input.size(1) * input.size(3) * input.size(4) for input_slice, gt_slice, info in zip(input, gt, slices_info): input_slice = Variable(input_slice).cuda() gt_slice = Variable(gt_slice).cuda() output_slice = net(input_slice) assert output_slice.size()[2:] == gt_slice.size()[1:] assert output_slice.size()[1] == cityscapes.num_classes output[:, info[0]: info[1], info[2]: info[3]] += output_slice[0, :, :info[4], :info[5]].data gts_all[vi, info[0]: info[1], info[2]: info[3]] += gt_slice[0, :info[4], :info[5]].data.cpu().numpy() count[info[0]: info[1], info[2]: info[3]] += 1 val_loss.update(criterion(output_slice, gt_slice).data[0], slice_batch_pixel_size) output /= count gts_all[vi, :, :] /= count.cpu().numpy().astype(int) predictions_all[vi, :, :] = output.max(0)[1].squeeze_(0).cpu().numpy() print('validating: %d / %d' % (vi + 1, len(val_loader))) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, cityscapes.num_classes) if val_loss.avg < train_args['best_record']['val_loss']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['iter'] = iter_num train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_iter_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % ( epoch, iter_num, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr']) torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth')) torch.save(optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth')) if train_args['val_save_to_img_file']: to_save_dir = os.path.join(ckpt_path, exp_name, '%d_%d' % (epoch, iter_num)) check_mkdir(to_save_dir) val_visual = [] for idx, data in enumerate(zip(gts_all, predictions_all)): gt_pil = cityscapes.colorize_mask(data[0]) predictions_pil = cityscapes.colorize_mask(data[1]) if train_args['val_save_to_img_file']: predictions_pil.save(os.path.join(to_save_dir, '%d_prediction.png' % idx)) gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx)) val_visual.extend([visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB'))]) val_visual = torch.stack(val_visual, 0) val_visual = vutils.make_grid(val_visual, nrow=2, padding=5) writer.add_image(snapshot_name, val_visual) print('-----------------------------------------------------------------------------------------------------------') print('[epoch %d], [iter %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % ( epoch, iter_num, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d], ' '[iter %d]' % (train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'], train_args['best_record']['iter'])) print('-----------------------------------------------------------------------------------------------------------') writer.add_scalar('val_loss', val_loss.avg, epoch) writer.add_scalar('acc', acc, epoch) writer.add_scalar('acc_cls', acc_cls, epoch) writer.add_scalar('mean_iu', mean_iu, epoch) writer.add_scalar('fwavacc', fwavacc, epoch) net.train() return val_loss.avg
def main(): torch.backends.cudnn.benchmark = True os.environ["CUDA_VISIBLE_DEVICES"] = '0,1' device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") vgg_model = VGGNet(requires_grad=True, remove_fc=True) net = FCN8s(pretrained_net=vgg_model, n_class=cityscapes.num_classes, dropout_rate=0.4) print('load model ' + args['snapshot']) vgg_model = vgg_model.to(device) net = net.to(device) if torch.cuda.device_count() > 1: net = nn.DataParallel(net) net.load_state_dict( torch.load(os.path.join(ckpt_path, args['exp_name'], args['snapshot']))) net.eval() mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) short_size = int(min(args['input_size']) / 0.875) val_joint_transform = joint_transforms.Compose([ joint_transforms.Scale(short_size), joint_transforms.CenterCrop(args['input_size']) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(*mean_std)]) target_transform = extended_transforms.MaskToTensor() restore_transform = transforms.Compose( [extended_transforms.DeNormalize(*mean_std), transforms.ToPILImage()]) # test_set = cityscapes.CityScapes('test', transform=test_transform) test_set = cityscapes.CityScapes('test', joint_transform=val_joint_transform, transform=test_transform, target_transform=target_transform) test_loader = DataLoader(test_set, batch_size=1, num_workers=8, shuffle=False) transform = transforms.ToPILImage() check_mkdir(os.path.join(ckpt_path, args['exp_name'], 'test')) gts_all, predictions_all = [], [] count = 0 for vi, data in enumerate(test_loader): # img_name, img = data img_name, img, gts = data img_name = img_name[0] # print(img_name) img_name = img_name.split('/')[-1] # img.save(os.path.join(ckpt_path, args['exp_name'], 'test', img_name)) img_transform = restore_transform(img[0]) # img_transform = img_transform.convert('RGB') img_transform.save( os.path.join(ckpt_path, args['exp_name'], 'test', img_name)) img_name = img_name.split('_leftImg8bit.png')[0] # img = Variable(img, volatile=True).cuda() img, gts = img.to(device), gts.to(device) output = net(img) prediction = output.data.max(1)[1].squeeze_(1).squeeze_( 0).cpu().numpy() prediction_img = cityscapes.colorize_mask(prediction) # print(type(prediction_img)) prediction_img.save( os.path.join(ckpt_path, args['exp_name'], 'test', img_name + '.png')) # print(ckpt_path, args['exp_name'], 'test', img_name + '.png') print('%d / %d' % (vi + 1, len(test_loader))) gts_all.append(gts.data.cpu().numpy()) predictions_all.append(prediction) # break # if count == 1: # break # count += 1 gts_all = np.concatenate(gts_all) predictions_all = np.concatenate(prediction) acc, acc_cls, mean_iou, _ = evaluate(predictions_all, gts_all, cityscapes.num_classes) print( '-----------------------------------------------------------------------------------------------------------' ) print('[acc %.5f], [acc_cls %.5f], [mean_iu %.5f]' % (acc, acc_cls, mean_iu))
def test_(img_path): net = PSPNet(num_classes=cityscapes.num_classes) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0: [30, xxx] -> [15, ...], [15, ...] on 2 GPUs net = nn.DataParallel(net) print('load model ' + args['snapshot']) net.load_state_dict( torch.load(os.path.join(ckpt_path, args['exp_name'], args['snapshot']))) net.eval() mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) val_input_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) sliding_crop = joint_transforms.SlidingCrop(args['crop_size'], args['stride_rate'], cityscapes.ignore_label) img = Image.open(img_path).convert('RGB') img_slices, _, slices_info = sliding_crop(img, img.copy()) img_slices = [val_input_transform(e) for e in img_slices] img = torch.stack(img_slices, 0) img = Variable(img, volatile=True).cuda() torch.no_grad() output = net(img) # prediction = output.data.max(1)[1].squeeze_(1).squeeze_(0).cpu().numpy() # prediction = voc.colorize_mask(prediction) # prediction.save(os.path.join(ckpt_path, args['exp_name'], 'test', img_name + '.png')) img.transpose_(0, 1) slices_info.squeeze_(0) count = torch.zeros(args['longer_size'] // 2, args['longer_size']).cuda() output = torch.zeros(cityscapes.num_classes, args['longer_size'] // 2, args['longer_size']).cuda() slice_batch_pixel_size = img.size(1) * img.size(3) * img.size(4) prediction = np.zeros((args['longer_size'] // 2, args['longer_size']), dtype=int) for input_slice, info in zip(img, slices_info): input_slice = Variable(input_slice).cuda() output_slice = net(input_slice) assert output_slice.size()[1] == cityscapes.num_classes output[:, info[0]:info[1], info[2]:info[3]] += output_slice[0, :, :info[4], :info[5]].data count[info[0]:info[1], info[2]:info[3]] += 1 output /= count prediction[:, :] = output.max(0)[1].squeeze_(0).cpu().numpy() test_dir = os.path.join(ckpt_path, args['exp_name'], 'test') img_name = os.path.basename(img_path) img_name = os.path.splitext(img_name)[0] print(img_name) if train_args['val_save_to_img_file']: check_mkdir(test_dir) val_visual = [] prediction_pil = cityscapes.colorize_mask(prediction) if train_args['val_save_to_img_file']: prediction_pil.save( os.path.join(test_dir, '%s_prediction.png' % img_name))