def train(args): device = "cpu" # Setup model model = get_model({"arch":"fcn8s"}, N_CLASSES, version="mit_sceneparsing_benchmark") state = convert_state_dict(torch.load(args.feature_model_path, map_location='cpu')["model_state"]) model.load_state_dict(state) model.eval() model.to(device) # Setup classifier classifier = Classifier() if args.classifier_model_path is not None: classifier.load_state_dict(torch.load(args.classifier_model_path, map_location='cpu')) classifier.to(device) optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=True) if args.train_csv_path is not None: print("Read training csv file from : {}".format(args.train_csv_path)) train_data = read_samples(args.train_csv_path, args.batch_size) for i in range(args.num_epoch): for img, label in train_data: train_step(model, classifier, optimizer, img, label) torch.save(classifier.state_dict(), args.output_model_path) if args.test_csv_path is not None: classifier.eval() print("Read testing csv file from : {}".format(args.test_csv_path)) test_data = read_samples(args.test_csv_path, 999) eval(model, classifier, test_data[0][0], test_data[0][1])
def test(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) data_loader = get_loader(args.dataset) loader = data_loader(root=None, is_transform=True, img_norm=args.img_norm, test_mode=True) n_classes = loader.n_classes resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp="bicubic") orig_size = img.shape[:-1] if model_name in ["pspnet", "icnet", "icnetBN"]: # uint8 with RGB mode, resize width and height which are odd numbers img = misc.imresize( img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1)) else: img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version=args.dataset) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) images = img.to(device) outputs = model(images) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) if model_name in ["pspnet", "icnet", "icnetBN"]: pred = pred.astype(np.float32) # float32 with F mode, resize back to orig_size pred = misc.imresize(pred, orig_size, "nearest", mode="F") print("Classes found: ", np.unique(pred)) misc.imsave(args.out_path, pred.astype('uint8')) print("Segmentation Mask Saved at: {}".format(args.out_path))
def validate(args): # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols)) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4) running_metrics = runningScore(n_classes) # Setup Model model = get_model(args.model_path[:args.model_path.find('_')], n_classes) state = convert_state_dict(torch.load(args.model_path)['model_state']) model.load_state_dict(state) model.eval() for i, (images, labels) in tqdm(enumerate(valloader)): model.cuda() images = Variable(images.cuda(), volatile=True) labels = Variable(labels.cuda(), volatile=True) outputs = model(images) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.data.cpu().numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, class_iou[i])
def inicializar_segsem(): print("Loading Semantic Segmentation Model:") start = time.time() global loader global device global model device = torch.device("cuda") model_name = "hardnet" data_loader = get_loader("ade20k") loader = data_loader(root=None, is_transform=True, img_norm=True, test_mode=True) n_classes = loader.n_classes # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version="ade20k") state = convert_state_dict( torch.load( "/home/socialab/FCHarDNet/runs/config./cur/hardnet_ade20k_best_model.pkl", )["model_state"]) model.load_state_dict(state) model.eval() model.to(device) end = time.time() print(" (time): " + str(end - start))
def train(args): # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset, config_file=args.config_file) loader = data_loader(data_path, is_transform=True, img_size=(args.img_rows, args.img_cols)) n_classes = loader.n_classes # must use 1 worker for AWS sagemaker without ipc="host" or larger shared memory size trainloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=1, shuffle=True) # Setup Model model = get_model(args.arch, n_classes) # Setup log dir / logging if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) configure(args.log_dir) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.9, weight_decay=5e-4) step = 0 for epoch in range(args.n_epoch): start_time = time.time() for i, (images, labels) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda()) optimizer.zero_grad() outputs = model(images) loss = cross_entropy2d(outputs, labels) loss.backward() optimizer.step() log_value('Loss', loss.data[0], step) step += 1 if (i + 1) % 20 == 0: print("Epoch [%d/%d] Loss: %.4f" % (epoch + 1, args.n_epoch, loss.data[0]), flush=True) end_time = time.time() print('Epoch run time: %s' % (end_time - start_time)) torch.save( model, args.log_dir + "{}_{}_{}_{}.pt".format( args.arch, args.dataset, args.feature_scale, epoch))
def test(args, cfg): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] IMG_Path = Path(args.img_path) IMG_File = natsort.natsorted(list(IMG_Path.glob("*.png")), alg=natsort.PATH) IMG_Str = [] for i in IMG_File: IMG_Str.append(str(i)) # Setup image print("Read Input Image from : {}".format(args.img_path)) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset, config_file=cfg) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes # Setup Model model = get_model(cfg['model'], n_classes) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state=torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) for j in tqdm(range(len(IMG_Str))): img_path = IMG_Str[j] img = misc.imread(img_path) # img = img[:, :, ::-1] img = img.astype(np.float64) # img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() images = img.to(device) outputs = model(images) outputs_probability = F.softmax(outputs) data = outputs_probability.data data_max = data.max(1) prob = data_max[0] prob_img_format = np.squeeze(prob.cpu().numpy(), axis=0) avg_prob = np.mean(prob_img_format) print("Confidence Score for %s: \n%f" % (img_path, avg_prob)) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) out_path = "test_out/test_confidence/out/" + Path(img_path).name decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR) # misc.imsave(out_path, decoded) cv.imwrite(out_path, decoded_bgr)
def test(args, cfg): os.environ["CUDA_VISIBLE_DEVICES"] = "1" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] IMG_Path = Path(args.img_path) IMG_File = natsort.natsorted(list(IMG_Path.glob("*.tif")), alg=natsort.PATH) IMG_Str = [] for i in IMG_File: IMG_Str.append(str(i)) # Setup image print("Read Input Image from : {}".format(args.img_path)) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset, config_file=cfg) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes v_loader = data_loader( data_path, is_transform=True, split=cfg['data']['val_split'], img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), ) valloader = data.DataLoader(v_loader, batch_size=cfg['training']['batch_size'], num_workers=cfg['training']['n_workers']) # Setup Model model = get_model(cfg['model'], n_classes) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state=torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) with torch.no_grad(): for i_val, (img_path, images_val, labels_val) in tqdm(enumerate(valloader)): img_name = img_path[0] images_val = images_val.to(device) outputs = model(images_val) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) out_path = "test_out/CAN_res50_4band_data07/" + Path( img_name).stem + ".png" decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR) # misc.imsave(out_path, decoded) cv.imwrite(out_path, decoded_bgr)
def validate(cfg, args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Dataloader data_loader = get_loader(cfg["data"]["dataset"]) data_path = cfg["data"]["path"] loader = data_loader( data_path, split=cfg["data"]["val_split"], is_transform=True, img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]), ) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=cfg["training"]["batch_size"], num_workers=8) running_metrics = runningScore(n_classes) # Setup Model model = get_model(cfg["model"], n_classes).to(device) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) for i, (images, labels) in enumerate(valloader): images = images.to(device) gt = labels.numpy() outputs = model(images).data.cpu().numpy() flipped_images = torch.flip(images, dims=(3, )) outputs_flipped = model(flipped_images) outputs_flipped = torch.flip(outputs_flipped, dims=(3, )).data.cpu().numpy() outputs = (outputs + outputs_flipped) / 2.0 pred = np.argmax(outputs, axis=1) running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, class_iou[i])
def load_model(self, config, modelfile): model = get_model({ 'arch': config['backbone'] }, config['classes']).to(self.device) if os.path.isfile(modelfile): print('loaded model from:', modelfile) state = convert_state_dict(torch.load(modelfile)["model_state"]) model.load_state_dict(state) model = torch.nn.DataParallel(model, device_ids=range( torch.cuda.device_count())) self.dummy_input = None self.graph_exported = False return model
def test(cfg, args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Dataloader data_loader = get_loader(cfg['data']['dataset'], cfg['task']) data_path = cfg['data']['path'] loader = data_loader( data_path, split=cfg['data']['test_split'], is_transform=True, img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), img_norm=cfg['data']['img_norm'] ) n_classes = loader.n_classes testloader = data.DataLoader(loader, batch_size=cfg['training']['batch_size'], num_workers=0) # Setup Model model = get_model(cfg['model'], cfg['task'], n_classes=n_classes).to(device) weights = torch.load(cfg['testing']['trained_model'], map_location=lambda storage, loc: storage) model.load_state_dict(weights["model_state"]) model.eval() model.to(device) for i, (images, labels, img_path) in tqdm(enumerate(testloader)): images = images.to(device) outputs = model(images) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap_tocolor(pred) # color segmentation mask decoded_labelID = loader.decode_segmap_tolabelId(pred) # segmentation mask of labelIDs for online test print("Classes found: ", np.unique(decoded_labelID)) # m.imsave("output.png", decoded) out_file_name = [img_path[0][39:-16], '*.png'] out_file_name = ''.join(out_file_name) out_path = os.path.join(args.out_path, out_file_name) decoded_labelID = m.imresize(decoded_labelID, (1024, 2048), "nearest", mode="F") m.toimage(decoded_labelID, high=np.max(decoded_labelID), low=np.min(decoded_labelID)).save(out_path) print("Segmentation Mask Saved at: {}".format(out_path))
def test(args): model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find('_')] print("Building " + model_name) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, phase='test') im_paths = loader.im_paths() n_classes = loader.n_classes testloader = data.DataLoader(loader, batch_size=1, num_workers=1, shuffle=False) # Setup Model model = get_model(model_name, n_classes) state = torch.load(args.model_path)['model_state'] model.load_state_dict(state) model.eval() model.cuda() # Run test for KITTI Road dataset for i, (image, tr_image, lidar, tr_lidar) in enumerate(testloader): im_name_splits = im_paths[i].split('/')[-1].split('.')[0].split('_') task = im_name_splits[0] print('processing %d-th image' % i) t0 = time.time() orig_h, orig_w = image.shape[1:3] with torch.no_grad(): tr_image = Variable(tr_image.cuda()) tr_lidar = Variable(tr_lidar.cuda()) outputs = model([tr_image, tr_lidar]) outputs = outputs.cpu().numpy().transpose((2, 3, 1, 0)).squeeze() outputs = cv2.resize(outputs, (orig_w, orig_h)) outputs = outputs[:, :, 1] print('Time({:d}'.format(i) + ') {0:.3f}'.format(time.time() - t0)) output_fg = outputs * 255. output_fg[output_fg > 255] = 255 output_fg = output_fg.astype(np.uint8) cv2.imwrite( './outputs/results/' + im_name_splits[0] + '_road_' + im_name_splits[1] + '.png', output_fg) print('write to ./outputs/results/' + im_name_splits[0] + '_road_' + im_name_splits[1] + '.png')
def test(): # model model_name = 'segnet' checkpoint_path = '/home/interns/xuan/pre_catkin_ws/src/pre2018/seg_cnn/training/2018-06-19/segnet_sunrgbd_best_model.pkl' # dataset dataset = 'sunrgbd' n_classes = 38 mean = np.array([104.00699, 116.66877, 122.67892]) # Setup Model model = get_model(model_name, n_classes) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model_state']) model.eval() # Setup image image_path = '/home/interns/xuan/datasets/SUNRGBD/test/img-000048.jpg' color_img = misc.imread(image_path) orig_size = color_img.shape[:-1] input_size = (240, 320) img = misc.imresize(color_img, input_size, interp='bicubic') img = img[:, :, ::-1] img = img.astype(float) img -= mean img = img / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() images = Variable(img.cuda(0), volatile=True) # do prediction outputs = model(images) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) pred = pred.astype(np.float32) pred = misc.imresize( pred, orig_size, 'nearest', mode='F') # float32 with F mode, resize back to orig_size cmap = color_map() decoded = decode_segmap(pred, n_classes, cmap) # show images plt.subplot(1, 2, 1), plt.imshow(color_img), plt.title('input') plt.subplot(1, 2, 2), plt.imshow(pred), plt.title('prediction') plt.show()
def test(cfg, args): # Setup device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Dataloader data_loader = get_loader(cfg['data']['dataset'], cfg['task']) data_path = cfg['data']['path'] loader = data_loader(data_path, split=cfg['data']['test_split'], is_transform=True, img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), img_norm=cfg['data']['img_norm']) n_classes = 0 running_metrics_val = runningScoreDepth(cfg['data']['dataset']) testloader = data.DataLoader(loader, batch_size=cfg['training']['batch_size'], num_workers=0) # Load Model model = get_model(cfg['model'], cfg['task'], n_classes=n_classes).to(device) #weights = torch.load(cfg['testing']['trained_model']) weights = torch.load(cfg['testing']['trained_model'], map_location=lambda storage, loc: storage) model.load_state_dict(weights["model_state"]) model.eval() model.to(device) with torch.no_grad(): for i, (images, labels, img_path) in tqdm(enumerate(testloader)): images = images.to(device) labels = labels.to(device) outputs = model(images) # [batch_size, n_classes, height, width] if cfg['model']['arch'] == "dispnet" and cfg['task'] == "depth": outputs = 1 / outputs pred = outputs.squeeze(1).data.cpu().numpy() gt = labels.data.squeeze(1).cpu().numpy() running_metrics_val.update(gt=gt, pred=pred) val_result = running_metrics_val.get_scores() for k, v in val_result.items(): print(k, v)
def __init__(self): self.img_width, self.img_height = 640, 480 print('Setting up CNN model...') # Set device self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU: device=cuda dataset = 'ade20k' model_name = 'pspnet' model_path = '/home/yubao/data/Dataset/semantic_slam/pspnet_50_ade20k.pth' if dataset == 'sunrgbd': # If use version fine tuned on sunrgbd dataset self.n_classes = 38 # Semantic class number self.model = get_model(model_name, self.n_classes, version='sunrgbd_res50') state = torch.load(model_path, map_location='cuda:0') self.model.load_state_dict(state) self.cnn_input_size = (321, 321) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset elif dataset == 'ade20k': self.n_classes = 150 # Semantic class number self.model = get_model(model_name, self.n_classes, version='ade20k') state = torch.load(model_path) self.model.load_state_dict(convert_state_dict( state['model_state'])) # Remove 'module' from dictionary keys self.cnn_input_size = (473, 473) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset self.model = self.model.to(self.device) self.model.eval() self.cmap = color_map( N=self.n_classes, normalized=False) # Color map for semantic classes
def test(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version=args.dataset) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) images = img.to(device) outputs = model(images) if args.mask_path: print("Read Image Mask from : {}".format(args.mask_path)) mask = torch.load(args.mask_path) mask = mask.to(device) outputs = to_super_to_pixels(outputs, mask) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) print("Classes found: ", np.unique(pred)) misc.imsave(args.out_path, decoded) print("Segmentation Mask Saved at: {}".format(args.out_path))
def _load_model(self, cfg): self.device = torch.device(cfg['device']) data_loader = get_loader('vistas') self.loader = data_loader(root=cfg['testing']['config_path'], is_transform=True, test_mode=True) n_classes = self.loader.n_classes # Setup Model model_dict = {"arch": 'icnetBN'} model = get_model(model_dict, n_classes) state = convert_state_dict( torch.load(cfg['testing']['model_path'])["model_state"]) model.load_state_dict(state) model.eval() model.to(self.device) return model
def infer(args): device = "cpu" # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) orig_size = img.shape[:-1] img = misc.imresize(img, (240, 240)) img = img[:, :, ::-1] img = img.astype(np.float64) img -= np.array([104.00699, 116.66877, 122.67892]) img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup model model = get_model({"arch": "fcn8s"}, N_CLASSES, version="mit_sceneparsing_benchmark") state = convert_state_dict( torch.load(args.model_path, map_location='cpu')["model_state"]) model.load_state_dict(state) model.eval() model.to(device) # Setup classifier classifier = Classifier() classifier.eval() classifier.to(device) images = img.to(device) outputs = model(images) # outputs = F.avg_pool2d(outputs, 8) # Uncomment to see the real feature map being used. pred_raw = outputs.data.max(1)[1] pred = np.squeeze(pred_raw.cpu().numpy(), axis=0) turn_logit = classifier(pred_raw.type(torch.FloatTensor) / N_CLASSES) print(turn_logit.detach().cpu().numpy()) decoded = decode_segmap(pred) print("Classes found: ", np.unique(pred)) misc.imsave(args.out_path, decoded) print("Segmentation Mask Saved at: {}".format(args.out_path))
def test(args): # Setup image print("Read Input Image from : {}".format(args.img_path)) orig_img = misc.imread(args.img_path) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, is_transform=True) n_classes = loader.n_classes img = orig_img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img.astype(float) / 255.0 # NHWC -> NCWH img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model = get_model(args.arch, n_classes) model.load_state_dict(torch.load(args.model_path)['state_dict']) model = torch.nn.DataParallel(model, device_ids=range( torch.cuda.device_count())).cuda() model.eval() if torch.cuda.is_available(): model.cuda(0) images = Variable(img.cuda(0)) else: images = Variable(img) outputs = model(images) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) if args.alpha_blend: orig_img = misc.imresize(orig_img, (loader.img_size[0], loader.img_size[1])) out_img = ALPHA * orig_img + (1 - ALPHA) * decoded else: out_img = decoded print(np.unique(pred)) misc.imsave(args.out_path, out_img) print("Segmentation Mask Saved at: {}".format(args.out_path))
def validate(cfg, args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Dataloader data_loader = get_loader(cfg['data']['dataset'], cfg['task']) data_path = cfg['data']['path'] loader = data_loader( data_path, split=cfg['data']['val_split'], is_transform=True, img_norm=cfg['data']['img_norm'], img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), ) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=cfg['training']['batch_size'], num_workers=0) running_metrics = runningScoreSeg(n_classes) # Setup Model model = get_model(cfg['model'], cfg['task'], n_classes).to(device) state = torch.load(args.model_path)["model_state"] #state = torch.load(args.model_path, map_location=lambda storage, loc: storage)["model_state"] model.load_state_dict(state) model.to(device) model.eval() with torch.no_grad(): for i, (images, labels, images_path) in enumerate(valloader): images = images.to(device) outputs = model(images) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, class_iou[i])
def get_sem_mask(model_file_name): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #img_path = input('Image path: ') img_path = 'results/munich_000009_000019_leftImg8bit.png' if len(img_path): if img_path[-3:] == 'png' or img_path[-3:] == 'jpg': print("Read Input Image from : %s" % (img_path)) else: raise Exception('Non PNG or JPG image!') else: img_path = 'results/munich_000009_000019_leftImg8bit.png' img = cv2.imread(img_path) img_orig = img model_name = model_file_name[:model_file_name.find("_")] data_loader = get_loader('cityscapes') loader = data_loader(root=None, is_transform=True, test_mode=True) n_classes = loader.n_classes img = image_preproc(img, loader.img_size) model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version='cityscapes') try: state = convert_state_dict(torch.load(model_file_name)["model_state"]) except: state = convert_state_dict( torch.load(model_file_name, map_location='cpu')["model_state"]) model.load_state_dict(state) model.eval() model.to(device) images = img.to(device) outputs = model(images) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) return pred, img_orig
def __init__(self, gen_pcl = True): """ Constructor \param gen_pcl (bool) whether generate point cloud, if set to true the node will subscribe to depth image """ # Get image size self.img_width, self.img_height = rospy.get_param('/camera/width'), rospy.get_param('/camera/height') # Set up CNN is use semantics #if self.point_type is not PointType.COLOR: print('Setting up CNN model...') # Set device self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Get dataset dataset = rospy.get_param('/semantic_pcl/dataset') # Setup model model_name ='pspnet' model_path = rospy.get_param('/semantic_pcl/model_path') #if dataset == 'sunrgbd': # If use version fine tuned on sunrgbd dataset self.n_classes = 38 # Semantic class number self.model = get_model(model_name, self.n_classes, version = 'sunrgbd_res50') state = torch.load(model_path, map_location='cuda:0') self.model.load_state_dict(state) self.cnn_input_size = (321, 321) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset self.model = self.model.to(self.device) self.model.eval() self.cmap = color_map(N = self.n_classes, normalized = False) # Color map for semantic classes # Set up ROS print('Setting up ROS...') self.bridge = CvBridge() # CvBridge to transform ROS Image message to OpenCV image if gen_pcl: # Point cloud frame id frame_id = rospy.get_param('/semantic_pcl/frame_id') # Camera intrinsic matrix fx = rospy.get_param('/camera/fx') fy = rospy.get_param('/camera/fy') cx = rospy.get_param('/camera/cx') cy = rospy.get_param('/camera/cy') print('get Camera intrinsic matrix') intrinsic = np.matrix([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype = np.float32) self.semlabel_pub = rospy.Publisher("/semantic_image/semantic_label", Image, queue_size=1) self.semcolor_pub = rospy.Publisher("/semantic_image/semantic_color", Image, queue_size=1) self.image_sub = rospy.Subscriber(rospy.get_param('/semantic_pcl/color_image_topic'), Image, self.color_callback, queue_size = 1, buff_size = 30*480*640) print('Ready.')
def init_model(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data_loader = get_loader("icboard") loader = data_loader(root=None, is_transform=True, img_size=eval(args.size), test_mode=True) n_classes = loader.n_classes # Setup Model model = get_model({"arch": "hardnet"}, n_classes) state = convert_state_dict( torch.load(args.model_path, map_location=device)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) return device, model, loader
def test(args, cfg): os.environ["CUDA_VISIBLE_DEVICES"] = "0" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data_loader = get_loader(cfg['data']['dataset']) data_path = get_data_path(cfg['data']['dataset'], config_file=cfg) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes t_loader = data_loader( data_path, is_transform=True, split='test', img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), ) testloader = data.DataLoader(t_loader, batch_size=1, num_workers=cfg['training']['n_workers']) # Setup Model model = get_model(cfg['model'], n_classes) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state=torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) with torch.no_grad(): for i_val, (img_path, image_src, image_dst) in tqdm(enumerate(testloader)): img_name = img_path[0] image_src = image_src.to(device) image_dst = image_dst.to(device) outputs = model(image_src, image_dst) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) out_path = "test_out/changenet_change_det/" + Path( img_name).stem + ".png" decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR) # misc.imsave(out_path, decoded) cv.imwrite(out_path, decoded_bgr)
def test(cfg): device = torch.device(cfg['device']) data_loader = get_loader('vistas') loader = data_loader(root=cfg['testing']['config_path'], is_transform=True, test_mode=True) n_classes = loader.n_classes # Setup Model model_dict = {"arch": 'icnetBN'} model = get_model(model_dict, n_classes) state = convert_state_dict( torch.load(cfg['testing']['model_path'])["model_state"]) model.load_state_dict(state) model.eval() model.to(device) for img_name in os.listdir(cfg['testing']['img_fold']): img_path = os.path.join(cfg['testing']['img_fold'], img_name) img = misc.imread(img_path) orig_size = img.shape[:-1] # uint8 with RGB mode, resize width and height which are odd numbers # img = misc.imresize(img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1)) img = misc.imresize( img, (cfg['testing']['img_rows'], cfg['testing']['img_cols'])) img = img.astype(np.float64) img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() img = img.to(device) outputs = model(img) outputs = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) outputs = outputs.astype(np.float32) # float32 with F mode, resize back to orig_size outputs = misc.imresize(outputs, orig_size, "nearest", mode="F") decoded = loader.decode_segmap(outputs) output_path = os.path.join(cfg['testing']['output_fold'], 'mask_%s.png' % img_name.split('.')[0]) misc.imsave(output_path, decoded)
def validate(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols)) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4) running_metrics = runningScore(n_classes) # Setup Model model = get_model(args.arch, n_classes) checkpoint = torch.load(args.model_path) state = convert_state_dict(checkpoint['model_state']) model.load_state_dict(state) print("Loaded checkpoint '{}' (epoch {})".format(args.model_path, checkpoint['epoch'])) model.eval() for i, (images, labels) in tqdm(enumerate(valloader)): model.cuda() images = Variable(images.cuda(), volatile=True) labels = Variable(labels.cuda(), volatile=True) outputs = model(images) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.data.cpu().numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, classes[i], class_iou[i]) print('\t'.join([str(class_iou[i]) for i in range(n_classes)]))
def run(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") mean = np.array([104.00699, 116.66877, 122.67892]) dataloader = DataLoader('E:/Autopilot/input/vc', 'E:/Autopilot/output/vc') model_path = "E:/Autopilot/pytorch-semseg-master/runs/39060/fcn8s_camvid_best_model.pkl" model_file_name = os.path.split(model_path)[1] model_name = model_file_name[:model_file_name.find("_")] model_dict = {"arch": model_name} model = get_model(model_dict, 2, version='camvid') state = convert_state_dict(torch.load(model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) buffer = [] for img0, _, _, _, frame in dataloader: if frame == 1: buffer = [] # x = 520 # y = 770 x = 550 y = 680 crop = img0[y:y + 304, x:x + 1085] img = preproc_img(crop, mean) img = img.to(device) outputs = model(img) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = decode_segmap(pred) res = overlay_mask(crop, decoded) res, buffer = foo(crop, res, decoded, buffer, x_l=455, y_l=180) img0[y:y + 304, x:x + 1085] = res dataloader.save_results(img0) cv2.imshow('123', res) if cv2.waitKey(1) == ord('q'): dataloader.release() break
def load_model_and_preprocess(cfg, args, n_classes, device): if 'NoParamShare' in cfg['model']['arch']: args.steps = cfg['model']['steps'] model = get_model(cfg['model'], n_classes, args).to(device) if os.path.exists(args.model_path): model_path = args.model_path else: model_path = pjoin(cfg['logdir'], cfg['training']['resume']) # print(model) state = convert_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage)["model_state"]) # IPython.embed() model.load_state_dict(state) model.eval() model.to(device) return model, model_path
def loadEncoder(): model = get_model(name='DeepLab', modality='rgb', n_classes=14, input_size=(128, 128), in_channels=3, mcdo_passes=1, dropoutP=0, full_mcdo=0, device='cuda', temperatureScaling=False, freeze_seg=True, freeze_temp=True).cuda() model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model_pkl = '/home/wcheung8/pytorch-semseg/models/DeepLab/rgb_DeepLab/rgb_DeepLab_airsim_T000.pkl' checkpoint = torch.load(model_pkl) pretrained_dict = checkpoint['model_state'] model_dict = model.state_dict() # 1. filter out unnecessary keys pretrained_dict = { k: v.resize_(model_dict[k].shape) for k, v in pretrained_dict.items() if (k in model_dict) } # and ((model!="fuse") or (model=="fuse" and not start_layer in k))} # 2. overwrite entries in the existing state dict model_dict.update(pretrained_dict) # 3. load the new state dict model.load_state_dict(pretrained_dict, strict=False) return model
def train(cfg, writer, logger): # Setup seeds torch.manual_seed(cfg.get("seed", 1337)) torch.cuda.manual_seed(cfg.get("seed", 1337)) np.random.seed(cfg.get("seed", 1337)) random.seed(cfg.get("seed", 1337)) # Setup device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Augmentations augmentations = cfg["training"].get("augmentations", None) data_aug = get_composed_augmentations(augmentations) # Setup Dataloader data_loader = get_loader(cfg["data"]["dataset"]) data_path = cfg["data"]["path"] t_loader = data_loader( data_path, sbd_path=cfg["data"]["sbd_path"], is_transform=True, split=cfg["data"]["train_split"], img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]), augmentations=data_aug, ) v_loader = data_loader( data_path, sbd_path=cfg["data"]["sbd_path"], is_transform=True, split=cfg["data"]["val_split"], img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]), ) n_classes = t_loader.n_classes trainloader = data.DataLoader( t_loader, batch_size=cfg["training"]["batch_size"], num_workers=cfg["training"]["n_workers"], shuffle=True, ) valloader = data.DataLoader(v_loader, batch_size=cfg["training"]["batch_size"], num_workers=cfg["training"]["n_workers"]) # Setup Metrics running_metrics_val = runningScore(n_classes) # Setup Model model = get_model(cfg["model"], n_classes).to(device) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) # Setup optimizer, lr_scheduler and loss function optimizer_cls = get_optimizer(cfg) optimizer_params = { k: v for k, v in cfg["training"]["optimizer"].items() if k != "name" } optimizer = optimizer_cls(model.parameters(), **optimizer_params) logger.info("Using optimizer {}".format(optimizer)) scheduler = get_scheduler(optimizer, cfg["training"]["lr_schedule"]) loss_fn = get_loss_function(cfg) logger.info("Using loss {}".format(loss_fn)) start_iter = 0 if cfg["training"]["resume"] is not None: if os.path.isfile(cfg["training"]["resume"]): logger.info( "Loading model and optimizer from checkpoint '{}'".format( cfg["training"]["resume"])) checkpoint = torch.load(cfg["training"]["resume"]) model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) scheduler.load_state_dict(checkpoint["scheduler_state"]) start_iter = checkpoint["epoch"] logger.info("Loaded checkpoint '{}' (iter {})".format( cfg["training"]["resume"], checkpoint["epoch"])) else: logger.info("No checkpoint found at '{}'".format( cfg["training"]["resume"])) val_loss_meter = averageMeter() time_meter = averageMeter() best_iou = -100.0 i = start_iter flag = True while i <= cfg["training"]["train_iters"] and flag: for (images, labels) in trainloader: i += 1 start_ts = time.time() scheduler.step() model.train() images = images.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(images) loss = loss_fn(input=outputs, target=labels) loss.backward() optimizer.step() time_meter.update(time.time() - start_ts) if (i + 1) % cfg["training"]["print_interval"] == 0: fmt_str = "Iter [{:d}/{:d}] Loss: {:.4f} Time/Image: {:.4f}" print_str = fmt_str.format( i + 1, cfg["training"]["train_iters"], loss.item(), time_meter.avg / cfg["training"]["batch_size"], ) print(print_str) logger.info(print_str) writer.add_scalar("loss/train_loss", loss.item(), i + 1) time_meter.reset() if (i + 1) % cfg["training"]["val_interval"] == 0 or ( i + 1) == cfg["training"]["train_iters"]: model.eval() with torch.no_grad(): for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)): images_val = images_val.to(device) labels_val = labels_val.to(device) outputs = model(images_val) val_loss = loss_fn(input=outputs, target=labels_val) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels_val.data.cpu().numpy() running_metrics_val.update(gt, pred) val_loss_meter.update(val_loss.item()) writer.add_scalar("loss/val_loss", val_loss_meter.avg, i + 1) logger.info("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg)) score, class_iou = running_metrics_val.get_scores() for k, v in score.items(): print(k, v) logger.info("{}: {}".format(k, v)) writer.add_scalar("val_metrics/{}".format(k), v, i + 1) for k, v in class_iou.items(): logger.info("{}: {}".format(k, v)) writer.add_scalar("val_metrics/cls_{}".format(k), v, i + 1) val_loss_meter.reset() running_metrics_val.reset() if score["Mean IoU : \t"] >= best_iou: best_iou = score["Mean IoU : \t"] state = { "epoch": i + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict(), "best_iou": best_iou, } save_path = os.path.join( writer.file_writer.get_logdir(), "{}_{}_best_model.pkl".format(cfg["model"]["arch"], cfg["data"]["dataset"]), ) torch.save(state, save_path) if (i + 1) == cfg["training"]["train_iters"]: flag = False break
def test(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] model_name = 'icnet_is_wp' # import ipdb # ipdb.set_trace() # Setup image print("Read Input Image from : {}".format(args.img_path)) img = imageio.imread(args.img_path) original_img = Image.fromarray(img).convert('RGBA') data_loader = get_loader(args.dataset) loader = data_loader(root=None, is_transform=True, img_norm=args.img_norm, test_mode=True) n_classes = loader.n_classes resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp="bicubic") orig_size = img.shape[:-1] if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is", "icnet_is_wp"]: # uint8 with RGB mode, resize width and height which are odd numbers img = misc.imresize( img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1)) else: img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean # if args.img_norm: # img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() images = img.to(device) if args.origianl_icnet_semantic_pred: model_dict = {"arch": "icnet"} model = get_model(model_dict, n_classes, version=args.dataset) state = convert_state_dict( torch.load("pretrained_models/icnetBN_cityscapes_trainval_90k.pth") ["model_state"]) # state = torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) outputs = model(images) # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version=args.dataset) model = FullModel(model, None) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state = torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) if args.origianl_icnet_semantic_pred: _, outputs_inst = model.model(images) else: outputs, outputs_inst = model.model(images) if args.dcrf: unary = outputs.data.cpu().numpy() unary = np.squeeze(unary, 0) unary = -np.log(unary) unary = unary.transpose(2, 1, 0) w, h, c = unary.shape unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1) unary = np.ascontiguousarray(unary) resized_img = np.ascontiguousarray(resized_img) d = dcrf.DenseCRF2D(w, h, loader.n_classes) d.setUnaryEnergy(unary) d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1) q = d.inference(50) mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0) decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8)) dcrf_path = args.out_path[:-4] + "_drf.png" misc.imsave(dcrf_path, decoded_crf) print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path)) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) pred_original = np.copy(pred) if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is"]: pred = pred.astype(np.float32) # float32 with F mode, resize back to orig_size pred = misc.imresize(pred, orig_size, "nearest", mode="F") interested_semantic_class_train_id = 17 outputs_inst = outputs_inst.cpu().detach().numpy() outputs_inst = outputs_inst[0, ...] outputs_inst = outputs_inst.transpose((1, 2, 0)) h, w, c = outputs_inst.shape outputs_inst_transformed = np.copy(outputs_inst.reshape((h * w, c))) pca = sklearnPCA(n_components=3) pca.fit(outputs_inst_transformed) outputs_inst_transformed = pca.transform(outputs_inst_transformed) outputs_inst_transformed -= outputs_inst_transformed.min(axis=0) outputs_inst_transformed /= outputs_inst_transformed.max(axis=0) outputs_inst_img = outputs_inst_transformed.reshape((h, w, 3)) outputs_inst_img = (outputs_inst_img * 255).astype(int) decoded = loader.decode_segmap(pred) print("Classes found: ", np.unique(pred)) imageio.imsave(args.out_path, decoded) imageio.imsave("inst_" + args.out_path, outputs_inst_img) print("Segmentation Mask Saved at: {}".format(args.out_path)) outputs_inst_transformed_single = np.copy(outputs_inst.reshape((h * w, c))) pred_transformed = pred_original.reshape((h * w)) pca.fit(outputs_inst_transformed_single[ pred_transformed == interested_semantic_class_train_id, :]) outputs_inst_transformed_single = pca.transform( outputs_inst_transformed_single) outputs_inst_transformed_single -= outputs_inst_transformed_single.min( axis=0) outputs_inst_transformed_single /= outputs_inst_transformed_single.max( axis=0) outputs_inst_transformed_single[ pred_transformed != interested_semantic_class_train_id, :] = 0 outputs_inst_single_img = outputs_inst_transformed_single.reshape( (h, w, 3)) outputs_inst_single_img = Image.fromarray( (outputs_inst_single_img * 255).astype(np.uint8)) outputs_inst_single_img.save("inst_single_" + args.out_path) outputs_inst_transformed_single = np.copy(outputs_inst.reshape((h * w, c))) bandwidth = estimate_bandwidth(outputs_inst_transformed_single[ pred_transformed == interested_semantic_class_train_id, :], quantile=0.1, n_samples=1000, n_jobs=12) print(bandwidth) ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, n_jobs=12) ms.fit(outputs_inst_transformed_single[ pred_transformed == interested_semantic_class_train_id, :]) clustering_label = ms.labels_ inst_segment_map_single = np.zeros((h * w)) inst_segment_map_single[ pred_transformed == interested_semantic_class_train_id] = clustering_label + 1 inst_segment_map_single = inst_segment_map_single.reshape(h, w) cmap = plt.cm.jet norm = plt.Normalize(vmin=inst_segment_map_single.min(), vmax=inst_segment_map_single.max()) # import ipdb # ipdb.set_trace() # map the normalized data to colors # image is now RGBA (512x512x4) inst_segment_map_single_image = cmap(norm(inst_segment_map_single)) inst_segment_map_single_image[inst_segment_map_single == 0] = [0, 0, 0, 1] inst_segment_map_single_image = Image.fromarray( (inst_segment_map_single_image * 255).astype(np.uint8)) # save the image inst_segment_map_single_image.save('inst_seg_map_' + args.out_path) # import ipdb # ipdb.set_trace() original_img = original_img.resize(inst_segment_map_single_image.size) inst_segment_map_single_image.putalpha(128) overlayed_image = Image.alpha_composite(original_img, inst_segment_map_single_image) overlayed_image.save('inst_seg_map_overlayed_' + args.out_path)
def test(args): # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, is_transform=True) n_classes = loader.n_classes resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp='bicubic') img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img.astype(float) / 255.0 # NHWC -> NCWH img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model = get_model(args.model_path[:args.model_path.find('_')], n_classes) state = convert_state_dict(torch.load(args.model_path)['model_state']) model.load_state_dict(state) model.eval() model.cuda(0) images = Variable(img.cuda(0), volatile=True) outputs = F.softmax(model(images), dim=1) if args.dcrf == "True": unary = outputs.data.cpu().numpy() unary = np.squeeze(unary, 0) unary = -np.log(unary) unary = unary.transpose(2, 1, 0) w, h, c = unary.shape unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1) unary = np.ascontiguousarray(unary) resized_img = np.ascontiguousarray(resized_img) d = dcrf.DenseCRF2D(w, h, loader.n_classes) d.setUnaryEnergy(unary) d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1) q = d.inference(50) mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0) decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8)) dcrf_path = args.out_path[:-4] + '_drf.png' misc.imsave(dcrf_path, decoded_crf) print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path)) if torch.cuda.is_available(): model.cuda(0) images = Variable(img.cuda(0), volatile=True) else: images = Variable(img, volatile=True) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) print('Classes found: ', np.unique(pred)) misc.imsave(args.out_path, decoded) print("Segmentation Mask Saved at: {}".format(args.out_path))
def train(args): # Setup Augmentations data_aug= Compose([RandomRotate(10), RandomHorizontallyFlip()]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, img_size=(args.img_rows, args.img_cols), augmentations=data_aug) v_loader = data_loader(data_path, is_transform=True, split='val', img_size=(args.img_rows, args.img_cols)) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=8, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=8) # Setup Metrics running_metrics = runningScore(n_classes) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) # Setup Model model = get_model(args.arch, n_classes) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Check if model has custom optimizer / loss if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4) if hasattr(model.module, 'loss'): print('Using custom loss') loss_fn = model.module.loss else: loss_fn = cross_entropy2d if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("No checkpoint found at '{}'".format(args.resume)) best_iou = -100.0 for epoch in range(args.n_epoch): model.train() for i, (images, labels) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda()) optimizer.zero_grad() outputs = model(images) loss = loss_fn(input=outputs, target=labels) loss.backward() optimizer.step() if args.visdom: vis.line( X=torch.ones((1, 1)).cpu() * i, Y=torch.Tensor([loss.data[0]]).unsqueeze(0).cpu(), win=loss_window, update='append') if (i+1) % 20 == 0: print("Epoch [%d/%d] Loss: %.4f" % (epoch+1, args.n_epoch, loss.data[0])) model.eval() for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)): images_val = Variable(images_val.cuda(), volatile=True) labels_val = Variable(labels_val.cuda(), volatile=True) outputs = model(images_val) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels_val.data.cpu().numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) running_metrics.reset() if score['Mean IoU : \t'] >= best_iou: best_iou = score['Mean IoU : \t'] state = {'epoch': epoch+1, 'model_state': model.state_dict(), 'optimizer_state' : optimizer.state_dict(),} torch.save(state, "{}_{}_best_model.pkl".format(args.arch, args.dataset))