def inicializar_segsem(): print("Loading Semantic Segmentation Model:") start = time.time() global loader global device global model device = torch.device("cuda") model_name = "hardnet" data_loader = get_loader("ade20k") loader = data_loader(root=None, is_transform=True, img_norm=True, test_mode=True) n_classes = loader.n_classes # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version="ade20k") state = convert_state_dict( torch.load( "/home/socialab/FCHarDNet/runs/config./cur/hardnet_ade20k_best_model.pkl", )["model_state"]) model.load_state_dict(state) model.eval() model.to(device) end = time.time() print(" (time): " + str(end - start))
def test(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) data_loader = get_loader(args.dataset) loader = data_loader(root=None, is_transform=True, img_norm=args.img_norm, test_mode=True) n_classes = loader.n_classes resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp="bicubic") orig_size = img.shape[:-1] if model_name in ["pspnet", "icnet", "icnetBN"]: # uint8 with RGB mode, resize width and height which are odd numbers img = misc.imresize( img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1)) else: img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version=args.dataset) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) images = img.to(device) outputs = model(images) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) if model_name in ["pspnet", "icnet", "icnetBN"]: pred = pred.astype(np.float32) # float32 with F mode, resize back to orig_size pred = misc.imresize(pred, orig_size, "nearest", mode="F") print("Classes found: ", np.unique(pred)) misc.imsave(args.out_path, pred.astype('uint8')) print("Segmentation Mask Saved at: {}".format(args.out_path))
def train(args): device = "cpu" # Setup model model = get_model({"arch":"fcn8s"}, N_CLASSES, version="mit_sceneparsing_benchmark") state = convert_state_dict(torch.load(args.feature_model_path, map_location='cpu')["model_state"]) model.load_state_dict(state) model.eval() model.to(device) # Setup classifier classifier = Classifier() if args.classifier_model_path is not None: classifier.load_state_dict(torch.load(args.classifier_model_path, map_location='cpu')) classifier.to(device) optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=True) if args.train_csv_path is not None: print("Read training csv file from : {}".format(args.train_csv_path)) train_data = read_samples(args.train_csv_path, args.batch_size) for i in range(args.num_epoch): for img, label in train_data: train_step(model, classifier, optimizer, img, label) torch.save(classifier.state_dict(), args.output_model_path) if args.test_csv_path is not None: classifier.eval() print("Read testing csv file from : {}".format(args.test_csv_path)) test_data = read_samples(args.test_csv_path, 999) eval(model, classifier, test_data[0][0], test_data[0][1])
def validate(args): # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols)) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4) running_metrics = runningScore(n_classes) # Setup Model model = get_model(args.model_path[:args.model_path.find('_')], n_classes) state = convert_state_dict(torch.load(args.model_path)['model_state']) model.load_state_dict(state) model.eval() for i, (images, labels) in tqdm(enumerate(valloader)): model.cuda() images = Variable(images.cuda(), volatile=True) labels = Variable(labels.cuda(), volatile=True) outputs = model(images) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.data.cpu().numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, class_iou[i])
def test(args, cfg): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] IMG_Path = Path(args.img_path) IMG_File = natsort.natsorted(list(IMG_Path.glob("*.png")), alg=natsort.PATH) IMG_Str = [] for i in IMG_File: IMG_Str.append(str(i)) # Setup image print("Read Input Image from : {}".format(args.img_path)) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset, config_file=cfg) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes # Setup Model model = get_model(cfg['model'], n_classes) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state=torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) for j in tqdm(range(len(IMG_Str))): img_path = IMG_Str[j] img = misc.imread(img_path) # img = img[:, :, ::-1] img = img.astype(np.float64) # img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() images = img.to(device) outputs = model(images) outputs_probability = F.softmax(outputs) data = outputs_probability.data data_max = data.max(1) prob = data_max[0] prob_img_format = np.squeeze(prob.cpu().numpy(), axis=0) avg_prob = np.mean(prob_img_format) print("Confidence Score for %s: \n%f" % (img_path, avg_prob)) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) out_path = "test_out/test_confidence/out/" + Path(img_path).name decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR) # misc.imsave(out_path, decoded) cv.imwrite(out_path, decoded_bgr)
def get_sem_mask(model_file_name): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #img_path = input('Image path: ') img_path = 'results/munich_000009_000019_leftImg8bit.png' if len(img_path): if img_path[-3:] == 'png' or img_path[-3:] == 'jpg': print("Read Input Image from : %s" % (img_path)) else: raise Exception('Non PNG or JPG image!') else: img_path = 'results/munich_000009_000019_leftImg8bit.png' img = cv2.imread(img_path) img_orig = img model_name = model_file_name[:model_file_name.find("_")] data_loader = get_loader('cityscapes') loader = data_loader(root=None, is_transform=True, test_mode=True) n_classes = loader.n_classes img = image_preproc(img, loader.img_size) model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version='cityscapes') try: state = convert_state_dict(torch.load(model_file_name)["model_state"]) except: state = convert_state_dict( torch.load(model_file_name, map_location='cpu')["model_state"]) model.load_state_dict(state) model.eval() model.to(device) images = img.to(device) outputs = model(images) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) return pred, img_orig
def test(args, cfg): os.environ["CUDA_VISIBLE_DEVICES"] = "1" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] IMG_Path = Path(args.img_path) IMG_File = natsort.natsorted(list(IMG_Path.glob("*.tif")), alg=natsort.PATH) IMG_Str = [] for i in IMG_File: IMG_Str.append(str(i)) # Setup image print("Read Input Image from : {}".format(args.img_path)) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset, config_file=cfg) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes v_loader = data_loader( data_path, is_transform=True, split=cfg['data']['val_split'], img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), ) valloader = data.DataLoader(v_loader, batch_size=cfg['training']['batch_size'], num_workers=cfg['training']['n_workers']) # Setup Model model = get_model(cfg['model'], n_classes) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state=torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) with torch.no_grad(): for i_val, (img_path, images_val, labels_val) in tqdm(enumerate(valloader)): img_name = img_path[0] images_val = images_val.to(device) outputs = model(images_val) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) out_path = "test_out/CAN_res50_4band_data07/" + Path( img_name).stem + ".png" decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR) # misc.imsave(out_path, decoded) cv.imwrite(out_path, decoded_bgr)
def validate(cfg, args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Dataloader data_loader = get_loader(cfg["data"]["dataset"]) data_path = cfg["data"]["path"] loader = data_loader( data_path, split=cfg["data"]["val_split"], is_transform=True, img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]), ) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=cfg["training"]["batch_size"], num_workers=8) running_metrics = runningScore(n_classes) # Setup Model model = get_model(cfg["model"], n_classes).to(device) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) for i, (images, labels) in enumerate(valloader): images = images.to(device) gt = labels.numpy() outputs = model(images).data.cpu().numpy() flipped_images = torch.flip(images, dims=(3, )) outputs_flipped = model(flipped_images) outputs_flipped = torch.flip(outputs_flipped, dims=(3, )).data.cpu().numpy() outputs = (outputs + outputs_flipped) / 2.0 pred = np.argmax(outputs, axis=1) running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, class_iou[i])
def load_model(self, config, modelfile): model = get_model({ 'arch': config['backbone'] }, config['classes']).to(self.device) if os.path.isfile(modelfile): print('loaded model from:', modelfile) state = convert_state_dict(torch.load(modelfile)["model_state"]) model.load_state_dict(state) model = torch.nn.DataParallel(model, device_ids=range( torch.cuda.device_count())) self.dummy_input = None self.graph_exported = False return model
def test(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version=args.dataset) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) images = img.to(device) outputs = model(images) if args.mask_path: print("Read Image Mask from : {}".format(args.mask_path)) mask = torch.load(args.mask_path) mask = mask.to(device) outputs = to_super_to_pixels(outputs, mask) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) print("Classes found: ", np.unique(pred)) misc.imsave(args.out_path, decoded) print("Segmentation Mask Saved at: {}".format(args.out_path))
def _load_model(self, cfg): self.device = torch.device(cfg['device']) data_loader = get_loader('vistas') self.loader = data_loader(root=cfg['testing']['config_path'], is_transform=True, test_mode=True) n_classes = self.loader.n_classes # Setup Model model_dict = {"arch": 'icnetBN'} model = get_model(model_dict, n_classes) state = convert_state_dict( torch.load(cfg['testing']['model_path'])["model_state"]) model.load_state_dict(state) model.eval() model.to(self.device) return model
def infer(args): device = "cpu" # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) orig_size = img.shape[:-1] img = misc.imresize(img, (240, 240)) img = img[:, :, ::-1] img = img.astype(np.float64) img -= np.array([104.00699, 116.66877, 122.67892]) img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup model model = get_model({"arch": "fcn8s"}, N_CLASSES, version="mit_sceneparsing_benchmark") state = convert_state_dict( torch.load(args.model_path, map_location='cpu')["model_state"]) model.load_state_dict(state) model.eval() model.to(device) # Setup classifier classifier = Classifier() classifier.eval() classifier.to(device) images = img.to(device) outputs = model(images) # outputs = F.avg_pool2d(outputs, 8) # Uncomment to see the real feature map being used. pred_raw = outputs.data.max(1)[1] pred = np.squeeze(pred_raw.cpu().numpy(), axis=0) turn_logit = classifier(pred_raw.type(torch.FloatTensor) / N_CLASSES) print(turn_logit.detach().cpu().numpy()) decoded = decode_segmap(pred) print("Classes found: ", np.unique(pred)) misc.imsave(args.out_path, decoded) print("Segmentation Mask Saved at: {}".format(args.out_path))
def init_model(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data_loader = get_loader("icboard") loader = data_loader(root=None, is_transform=True, img_size=eval(args.size), test_mode=True) n_classes = loader.n_classes # Setup Model model = get_model({"arch": "hardnet"}, n_classes) state = convert_state_dict( torch.load(args.model_path, map_location=device)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) return device, model, loader
def test(cfg): device = torch.device(cfg['device']) data_loader = get_loader('vistas') loader = data_loader(root=cfg['testing']['config_path'], is_transform=True, test_mode=True) n_classes = loader.n_classes # Setup Model model_dict = {"arch": 'icnetBN'} model = get_model(model_dict, n_classes) state = convert_state_dict( torch.load(cfg['testing']['model_path'])["model_state"]) model.load_state_dict(state) model.eval() model.to(device) for img_name in os.listdir(cfg['testing']['img_fold']): img_path = os.path.join(cfg['testing']['img_fold'], img_name) img = misc.imread(img_path) orig_size = img.shape[:-1] # uint8 with RGB mode, resize width and height which are odd numbers # img = misc.imresize(img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1)) img = misc.imresize( img, (cfg['testing']['img_rows'], cfg['testing']['img_cols'])) img = img.astype(np.float64) img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() img = img.to(device) outputs = model(img) outputs = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) outputs = outputs.astype(np.float32) # float32 with F mode, resize back to orig_size outputs = misc.imresize(outputs, orig_size, "nearest", mode="F") decoded = loader.decode_segmap(outputs) output_path = os.path.join(cfg['testing']['output_fold'], 'mask_%s.png' % img_name.split('.')[0]) misc.imsave(output_path, decoded)
def validate(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols)) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4) running_metrics = runningScore(n_classes) # Setup Model model = get_model(args.arch, n_classes) checkpoint = torch.load(args.model_path) state = convert_state_dict(checkpoint['model_state']) model.load_state_dict(state) print("Loaded checkpoint '{}' (epoch {})".format(args.model_path, checkpoint['epoch'])) model.eval() for i, (images, labels) in tqdm(enumerate(valloader)): model.cuda() images = Variable(images.cuda(), volatile=True) labels = Variable(labels.cuda(), volatile=True) outputs = model(images) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.data.cpu().numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, classes[i], class_iou[i]) print('\t'.join([str(class_iou[i]) for i in range(n_classes)]))
def test(args, cfg): os.environ["CUDA_VISIBLE_DEVICES"] = "0" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data_loader = get_loader(cfg['data']['dataset']) data_path = get_data_path(cfg['data']['dataset'], config_file=cfg) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes t_loader = data_loader( data_path, is_transform=True, split='test', img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), ) testloader = data.DataLoader(t_loader, batch_size=1, num_workers=cfg['training']['n_workers']) # Setup Model model = get_model(cfg['model'], n_classes) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state=torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) with torch.no_grad(): for i_val, (img_path, image_src, image_dst) in tqdm(enumerate(testloader)): img_name = img_path[0] image_src = image_src.to(device) image_dst = image_dst.to(device) outputs = model(image_src, image_dst) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) out_path = "test_out/changenet_change_det/" + Path( img_name).stem + ".png" decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR) # misc.imsave(out_path, decoded) cv.imwrite(out_path, decoded_bgr)
def run(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") mean = np.array([104.00699, 116.66877, 122.67892]) dataloader = DataLoader('E:/Autopilot/input/vc', 'E:/Autopilot/output/vc') model_path = "E:/Autopilot/pytorch-semseg-master/runs/39060/fcn8s_camvid_best_model.pkl" model_file_name = os.path.split(model_path)[1] model_name = model_file_name[:model_file_name.find("_")] model_dict = {"arch": model_name} model = get_model(model_dict, 2, version='camvid') state = convert_state_dict(torch.load(model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) buffer = [] for img0, _, _, _, frame in dataloader: if frame == 1: buffer = [] # x = 520 # y = 770 x = 550 y = 680 crop = img0[y:y + 304, x:x + 1085] img = preproc_img(crop, mean) img = img.to(device) outputs = model(img) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = decode_segmap(pred) res = overlay_mask(crop, decoded) res, buffer = foo(crop, res, decoded, buffer, x_l=455, y_l=180) img0[y:y + 304, x:x + 1085] = res dataloader.save_results(img0) cv2.imshow('123', res) if cv2.waitKey(1) == ord('q'): dataloader.release() break
def load_model_and_preprocess(cfg, args, n_classes, device): if 'NoParamShare' in cfg['model']['arch']: args.steps = cfg['model']['steps'] model = get_model(cfg['model'], n_classes, args).to(device) if os.path.exists(args.model_path): model_path = args.model_path else: model_path = pjoin(cfg['logdir'], cfg['training']['resume']) # print(model) state = convert_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage)["model_state"]) # IPython.embed() model.load_state_dict(state) model.eval() model.to(device) return model, model_path
def __init__(self): self.img_width, self.img_height = 640, 480 print('Setting up CNN model...') # Set device self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU: device=cuda dataset = 'ade20k' model_name = 'pspnet' model_path = '/home/yubao/data/Dataset/semantic_slam/pspnet_50_ade20k.pth' if dataset == 'sunrgbd': # If use version fine tuned on sunrgbd dataset self.n_classes = 38 # Semantic class number self.model = get_model(model_name, self.n_classes, version='sunrgbd_res50') state = torch.load(model_path, map_location='cuda:0') self.model.load_state_dict(state) self.cnn_input_size = (321, 321) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset elif dataset == 'ade20k': self.n_classes = 150 # Semantic class number self.model = get_model(model_name, self.n_classes, version='ade20k') state = torch.load(model_path) self.model.load_state_dict(convert_state_dict( state['model_state'])) # Remove 'module' from dictionary keys self.cnn_input_size = (473, 473) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset self.model = self.model.to(self.device) self.model.eval() self.cmap = color_map( N=self.n_classes, normalized=False) # Color map for semantic classes
def train(cfg, writer, logger): # Setup dataset split before setting up the seed for random data_split_info = init_data_split(cfg['data']['path'], cfg['data'].get( 'split_ratio', 0), cfg['data'].get('compound', False)) # fly jenelia dataset # Setup seeds torch.manual_seed(cfg.get('seed', 1337)) torch.cuda.manual_seed(cfg.get('seed', 1337)) np.random.seed(cfg.get('seed', 1337)) random.seed(cfg.get('seed', 1337)) # Setup device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Cross Entropy Weight if cfg['training']['loss']['name'] != 'regression_l1': weight = prep_class_val_weights(cfg['training']['cross_entropy_ratio']) else: weight = None log('Using loss : {}'.format(cfg['training']['loss']['name'])) # Setup Augmentations augmentations = cfg['training'].get( 'augmentations', None) # if no augmentation => default None data_aug = get_composed_augmentations(augmentations) # Setup Dataloader data_loader = get_loader(cfg['data']['dataset']) data_path = cfg['data']['path'] patch_size = [para for axis, para in cfg['training']['patch_size'].items()] t_loader = data_loader(data_path, split=cfg['data']['train_split'], augmentations=data_aug, data_split_info=data_split_info, patch_size=patch_size, allow_empty_patch=cfg['training'].get( 'allow_empty_patch', True), n_classes=cfg['training'].get('n_classes', 1)) # v_loader = data_loader( # data_path, # split=cfg['data']['val_split'], # data_split_info=data_split_info, # patch_size=patch_size, # n_classe=cfg['training'].get('n_classes', 1)) n_classes = t_loader.n_classes log('n_classes is: {}'.format(n_classes)) trainloader = data.DataLoader(t_loader, batch_size=cfg['training']['batch_size'], num_workers=cfg['training']['n_workers'], shuffle=False) # valloader = data.DataLoader(v_loader, # batch_size=cfg['training']['batch_size'], # num_workers=cfg['training']['n_workers']) # Setup Metrics running_metrics_val = runningScore( n_classes) # a confusion matrix is created # Setup Model model = get_model(cfg['model'], n_classes).to(device) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) # if cfg['training'].get('pretrained_model', None) is not None: # log('Load pretrained model: {}'.format(cfg['training'].get('pretrained_model', None))) # pretrainedModel = torch.load(cfg['training'].get('pretrained_model', None)) # my_dict = model.state_dict() # x = my_dict.copy() # pretrained_dict = pretrainedModel['model_state'] # # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in my_dict} # my_dict.update(pretrained_dict) # y = my_dict.copy() # shared_items = {k: x[k] for k in x if k in y and torch.equal(x[k], y[k])} # if len(shared_items) == len(my_dict): # exit(1) # Setup optimizer, lr_scheduler and loss function optimizer_cls = get_optimizer(cfg) optimizer_params = { k: v for k, v in cfg['training']['optimizer'].items() if k != 'name' } optimizer = optimizer_cls(model.parameters(), **optimizer_params) logger.info("Using optimizer {}".format(optimizer)) scheduler = get_scheduler(optimizer, cfg['training']['lr_schedule']) loss_fn = get_loss_function(cfg) logger.info("Using loss {}".format(loss_fn)) softmax_function = nn.Softmax(dim=1) # model_count = 0 min_loss = None start_iter = 0 if cfg['training']['resume'] is not None: log('resume saved model') if os.path.isfile(cfg['training']['resume']): display("Loading model and optimizer from checkpoint '{}'".format( cfg['training']['resume'])) checkpoint = torch.load(cfg['training']['resume']) model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) scheduler.load_state_dict(checkpoint["scheduler_state"]) start_iter = checkpoint["epoch"] min_loss = checkpoint["min_loss"] display("Loaded checkpoint '{}' (iter {})".format( cfg['training']['resume'], checkpoint["epoch"])) else: display("No checkpoint found at '{}'".format( cfg['training']['resume'])) log('no saved model found') val_loss_meter = averageMeter() time_meter = averageMeter() i_train_iter = start_iter display('Training from {}th iteration\n'.format(i_train_iter)) while i_train_iter < cfg['training']['train_iters']: i_batch_idx = 0 train_iter_start_time = time.time() averageLoss = 0 # training for (images, labels) in trainloader: start_ts = time.time() scheduler.step() model.train() images = images.to(device) labels = labels.to(device) # mean = images[0] soft_loss = -1 mediate_average_loss = -1 optimizer.zero_grad() if cfg['model']['arch'] == 'unet3dreg' or cfg['model'][ 'arch'] == 'unet3d': outputs = model(images) else: outputs, myconv1_copy, myconv3_copy, myup2_copy, myup1_copy = model( images) if cfg['training'].get('task', 'regression') == 'regression': loss = nn.L1Loss() hard_loss = loss(outputs, labels) else: hard_loss = loss_fn( input=outputs, target=labels, weight=weight, size_average=cfg['training']['loss']['size_average']) if cfg['training'].get('fed_by_teacher', False): # Setup Teacher Model model_file_name = cfg['training'].get('pretrained_model', None) model_name = { 'arch': model_file_name.split('/')[-1].split('_')[0] } teacher_model = get_model(model_name, n_classes) pretrainedModel = torch.load(cfg['training'].get( 'pretrained_model', None)) teacher_state = convert_state_dict( pretrainedModel["model_state"] ) # maybe in this way it can take multiple images??? teacher_model.load_state_dict(teacher_state) teacher_model.eval() teacher_model.to(device) outputs_teacher, conv1_copy, conv3_copy, up2_copy, up1_copy = teacher_model( images) outputs_teacher = autograd.Variable(outputs_teacher, requires_grad=False) conv1_copy = autograd.Variable(conv1_copy, requires_grad=False) conv3_copy = autograd.Variable(conv3_copy, requires_grad=False) up2_copy = autograd.Variable(up2_copy, requires_grad=False) up1_copy = autograd.Variable(up1_copy, requires_grad=False) soft_loss = loss(outputs, outputs_teacher) # loss_hard_soft = 0.8 * hard_loss + 0.1 * soft_loss loss_hard_soft = hard_loss + 0.1 * soft_loss if cfg['training'].get('fed_by_intermediate', False): mediate1_loss = loss(myconv1_copy, conv1_copy) mediate2_loss = loss(myconv3_copy, conv3_copy) mediate3_loss = loss(myup2_copy, up2_copy) mediate4_loss = loss(myup1_copy, up1_copy) mediate_average_loss = (mediate1_loss + mediate2_loss + mediate3_loss + mediate4_loss) / 4 log('mediate1_loss: {}, mediate2_loss: {}, mediate3_loss: {}, mediate4_loss: {}' .format(mediate1_loss, mediate2_loss, mediate3_loss, mediate4_loss)) loss = loss_hard_soft + 0.1 * mediate_average_loss else: loss = 0.9 * hard_loss + 0.1 * soft_loss elif cfg['training'].get('fed_by_intermediate', False): # Setup Teacher Model model_file_name = cfg['training'].get('pretrained_model', None) model_name = { 'arch': model_file_name.split('/')[-1].split('_')[0] } teacher_model = get_model(model_name, n_classes) pretrainedModel = torch.load(cfg['training'].get( 'pretrained_model', None)) teacher_state = convert_state_dict( pretrainedModel["model_state"] ) # maybe in this way it can take multiple images??? teacher_model.load_state_dict(teacher_state) teacher_model.eval() teacher_model.to(device) outputs_teacher, conv1_copy, conv3_copy, up2_copy, up1_copy = teacher_model( images) outputs_teacher = autograd.Variable(outputs_teacher, requires_grad=False) conv1_copy = autograd.Variable(conv1_copy, requires_grad=False) conv3_copy = autograd.Variable(conv3_copy, requires_grad=False) up2_copy = autograd.Variable(up2_copy, requires_grad=False) up1_copy = autograd.Variable(up1_copy, requires_grad=False) mediate1_loss = loss(myconv1_copy, conv1_copy) mediate2_loss = loss(myconv3_copy, conv3_copy) mediate3_loss = loss(myup2_copy, up2_copy) mediate4_loss = loss(myup1_copy, up1_copy) mediate_average_loss = (mediate1_loss + mediate2_loss + mediate3_loss + mediate4_loss) / 4 log('mediate1_loss: {}, mediate2_loss: {}, mediate3_loss: {}, mediate4_loss: {}' .format(mediate1_loss, mediate2_loss, mediate3_loss, mediate4_loss)) loss = 0.9 * hard_loss + 0.1 * mediate_average_loss else: loss = hard_loss log('==> hard loss: {} soft loss: {} mediate loss: {}'.format( hard_loss, soft_loss, mediate_average_loss)) averageLoss += loss loss.backward() optimizer.step() time_meter.update(time.time() - start_ts) print_per_batch_check = True if cfg['training'][ 'print_interval_per_batch'] else i_batch_idx + 1 == len( trainloader) if (i_train_iter + 1) % cfg['training'][ 'print_interval'] == 0 and print_per_batch_check: fmt_str = "Iter [{:d}/{:d}] Loss: {:.4f} Time/Image: {:.4f}" print_str = fmt_str.format( i_train_iter + 1, cfg['training']['train_iters'], loss.item(), time_meter.avg / cfg['training']['batch_size']) display(print_str) writer.add_scalar('loss/train_loss', loss.item(), i_train_iter + 1) time_meter.reset() i_batch_idx += 1 time_for_one_iteration = time.time() - train_iter_start_time display( 'EntireTime for {}th training iteration: {} EntireTime/Image: {}'. format( i_train_iter + 1, time_converter(time_for_one_iteration), time_converter( time_for_one_iteration / (len(trainloader) * cfg['training']['batch_size'])))) averageLoss /= (len(trainloader) * cfg['training']['batch_size']) # validation validation_check = (i_train_iter + 1) % cfg['training']['val_interval'] == 0 or \ (i_train_iter + 1) == cfg['training']['train_iters'] if not validation_check: print('no validation check') else: ''' This IF-CHECK is used to update the best model ''' log('Validation: average loss for current iteration is: {}'.format( averageLoss)) if min_loss is None: min_loss = averageLoss if averageLoss <= min_loss: min_loss = averageLoss state = { "epoch": i_train_iter + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict(), "min_loss": min_loss } save_path = os.path.join( os.getcwd(), writer.file_writer.get_logdir(), "{}_{}_model_best.pkl".format(cfg['model']['arch'], cfg['data']['dataset'])) print('save_path is: ' + save_path) # with open('/home/heng/Research/isbi/log_final_experiment.txt', 'a') as f: # to change!!!!! # id = cfg['id'] # f.write(str(id) + ':' + save_path + '\n') torch.save(state, save_path) # if score["Mean IoU : \t"] >= best_iou: # best_iou = score["Mean IoU : \t"] # state = { # "epoch": i_train_iter + 1, # "model_state": model.state_dict(), # "optimizer_state": optimizer.state_dict(), # "scheduler_state": scheduler.state_dict(), # "best_iou": best_iou, # } # save_path = os.path.join(writer.file_writer.get_logdir(), # "{}_{}_best_model.pkl".format( # cfg['model']['arch'], # cfg['data']['dataset'])) # torch.save(state, save_path) # model_count += 1 i_train_iter += 1 with open('/home/heng/Research/isbi/log_final_experiment_flyJanelia.txt', 'a') as f: # to change!!!!! id = cfg['id'] f.write(str(id) + ':' + save_path + '\n')
def test(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[: model_file_name.find("_")] for img, type, name, path in dataloader: # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) dataloader = data_loader = get_loader(args.dataset) loader = data_loader(root=None, is_transform=True, img_norm=args.img_norm, test_mode=True) n_classes = loader.n_classes resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp="bicubic") orig_size = img.shape[:-1] if model_name in ["pspnet", "icnet", "icnetBN"]: # uint8 with RGB mode, resize width and height which are odd numbers img = misc.imresize(img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1)) else: img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version=args.dataset) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) images = img.to(device) outputs = model(images) if args.dcrf: unary = outputs.data.cpu().numpy() unary = np.squeeze(unary, 0) unary = -np.log(unary) unary = unary.transpose(2, 1, 0) w, h, c = unary.shape unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1) unary = np.ascontiguousarray(unary) resized_img = np.ascontiguousarray(resized_img) d = dcrf.DenseCRF2D(w, h, loader.n_classes) d.setUnaryEnergy(unary) d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1) q = d.inference(50) mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0) decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8)) dcrf_path = args.out_path[:-4] + "_drf.png" misc.imsave("{}/{}.jpg", decoded_crf) print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path)) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) if model_name in ["pspnet", "icnet", "icnetBN"]: pred = pred.astype(np.float32) # float32 with F mode, resize back to orig_size pred = misc.imresize(pred, orig_size, "nearest", mode="F") decoded = loader.decode_segmap(pred) print("Classes found: ", np.unique(pred)) misc.imsave(args.out_path, decoded) print("Segmentation Mask Saved at: {}".format(args.out_path))
def validate(cfg, args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] # Setup Dataloader data_loader = get_loader(cfg['data']['dataset']) data_path = get_data_path(cfg['data']['dataset']) loader = data_loader( data_path, split=cfg['data']['val_split'], is_transform=True, img_size=(cfg['data']['img_rows'], cfg['data']['img_rows']), ) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=cfg['training']['batch_size'], num_workers=8) running_metrics = runningScore(n_classes) # Setup Model model = get_model(model_name, n_classes, version=cfg['data']['dataset']) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) for i, (images, labels) in enumerate(valloader): start_time = timeit.default_timer() images = images.to(device) if args.eval_flip: outputs = model(images) # Flip images in numpy (not support in tensor) outputs = outputs.data.cpu().numpy() flipped_images = np.copy(images.data.cpu().numpy()[:, :, :, ::-1]) flipped_images = torch.from_numpy(flipped_images).float().to( device) outputs_flipped = model(flipped_images) outputs_flipped = outputs_flipped.data.cpu().numpy() outputs = (outputs + outputs_flipped[:, :, :, ::-1]) / 2.0 pred = np.argmax(outputs, axis=1) else: outputs = model(images) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.numpy() if args.measure_time: elapsed_time = timeit.default_timer() - start_time print("Inference time \ (iter {0:5d}): {1:3.5f} fps".format( i + 1, pred.shape[0] / elapsed_time)) running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, class_iou[i])
def test(args): model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find('_')] # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp='bicubic') orig_size = img.shape[:-1] if model_name in ['pspnet', 'icnet', 'icnetBN']: img = misc.imresize(img, (orig_size[0]//2*2+1, orig_size[1]//2*2+1)) # uint8 with RGB mode, resize width and height which are odd numbers else: img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model = get_model(model_name, n_classes, version=args.dataset) state = convert_state_dict(torch.load(args.model_path)['model_state']) model.load_state_dict(state) model.eval() if torch.cuda.is_available(): model.cuda(0) images = Variable(img.cuda(0), volatile=True) else: images = Variable(img, volatile=True) outputs = model(images) #outputs = F.softmax(outputs, dim=1) if args.dcrf: unary = outputs.data.cpu().numpy() unary = np.squeeze(unary, 0) unary = -np.log(unary) unary = unary.transpose(2, 1, 0) w, h, c = unary.shape unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1) unary = np.ascontiguousarray(unary) resized_img = np.ascontiguousarray(resized_img) d = dcrf.DenseCRF2D(w, h, loader.n_classes) d.setUnaryEnergy(unary) d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1) q = d.inference(50) mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0) decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8)) dcrf_path = args.out_path[:-4] + '_drf.png' misc.imsave(dcrf_path, decoded_crf) print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path)) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) if model_name in ['pspnet', 'icnet', 'icnetBN']: pred = pred.astype(np.float32) pred = misc.imresize(pred, orig_size, 'nearest', mode='F') # float32 with F mode, resize back to orig_size decoded = loader.decode_segmap(pred) print('Classes found: ', np.unique(pred)) misc.imsave(args.out_path, decoded) print("Segmentation Mask Saved at: {}".format(args.out_path))
def validate(cfg, args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Augmentations data_aug = None if "validation" in cfg: augmentations = cfg["validation"].get("augmentations", None) if cfg["data"]["dataset"] == "softmax_cityscapes_convention": data_aug = get_composed_augmentations_softmax(augmentations) else: data_aug = get_composed_augmentations(augmentations) # Setup Dataloader data_loader = get_loader(cfg["data"]["dataset"]) data_path = cfg["data"]["path"] loader = data_loader( data_path, config = cfg["data"], is_transform=True, split=cfg["data"][args.dataset_split], img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]), augmentations=data_aug, ) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=1, num_workers=1) # Setup Metrics running_metrics_val = {"seg": runningScoreSeg(n_classes)} if "classifiers" in cfg["data"]: for name, classes in cfg["data"]["classifiers"].items(): running_metrics_val[name] = runningScoreClassifier( len(classes) ) if "bin_classifiers" in cfg["data"]: for name, classes in cfg["data"]["bin_classifiers"].items(): running_metrics_val[name] = runningScoreClassifier(2) # Setup Model model = get_model(cfg["model"], n_classes).to(device) state = torch.load(args.model_path, map_location="cuda:0")["model_state"] state = convert_state_dict(state) # converts from dataParallel module to normal module model.load_state_dict(state, strict=False) if args.bn_fusion: model = fuse_bn_recursively(model) if args.update_bn: print("Reset BatchNorm and recalculate mean/var") model.apply(reset_batchnorm) model.train() else: model.eval() # set batchnorm and dropouts to work in eval mode model.to(device) total_time = 0 total_params = sum(p.numel() for p in model.parameters()) print('Parameters: ', total_params ) #stat(model, (3, 1024, 2048)) torch.backends.cudnn.benchmark=True with open(args.output_csv_path, 'a') as output_csv: output_csv.write(create_overall_logs_header(running_metrics_val)) for i, (images, label_dict, fname) in enumerate(valloader): images = images.to(device) torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): # deactivates autograd engine, less mem usage output_dict = model(images) torch.cuda.synchronize() elapsed_time = time.perf_counter() - start_time if args.save_image: save_image(images, output_dict, fname, args.output_path, loader=loader) image_score = [] for name, metrics in running_metrics_val.items(): # update running metrics and record imagewise metrics gt_array = label_dict[name].data.cpu().numpy() if name+'_loss' in cfg['training'] and cfg['training'][name+'_loss']['name'] == 'l1': # for binary classification pred_array = output_dict[name].data.cpu().numpy() pred_array = np.sign(pred_array) pred_array[pred_array == -1] = 0 gt_array[gt_array == -1] = 0 else: pred_array = output_dict[name].data.max(1)[1].cpu().numpy() if name == "seg" or name == "softmax": image_score.append( "%.3f" %metrics.get_image_score(gt_array, pred_array) ) else: imagewise_score = softmax(np.squeeze( output_dict[name].data.cpu().numpy() )).round(3) image_score.append( "%.3f" %(imagewise_score[gt_array[0]]) ) image_score.append( str(imagewise_score) ) # append raw probability results for non-segmentation task image_score.append( "pred %s label %s" %(np.argmax(imagewise_score), gt_array[0])) metrics.update(gt_array, pred_array) output_csv.write( '%s, %.4f, %s\n' %(fname[0], 1 / elapsed_time, ",".join(image_score)) ) # record imagewise metrics if args.measure_time: total_time += elapsed_time print( "Iter {0:5d}: {1:3.5f} fps {2}".format( i + 1, 1 / elapsed_time, " ".join(image_score) ) ) print("Total Frame Rate = %.2f fps" %(i/total_time )) if args.update_bn: model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) state2 = {"model_state": model.state_dict()} torch.save(state2, 'hardnet_cityscapes_mod.pth') with open(args.miou_logs_path, 'a') as main_output_csv: # record overall metrics main_output_csv.write( '%s\n' %args.output_csv_path ) for name, metrics in running_metrics_val.items(): overall, classwise = metrics.get_scores() for k, v in overall.items(): print("{}_{}: {}".format(name, k, v)) main_output_csv.write("%s,%s,%s\n" %(name, k, v)) for metric_name, metric in classwise.items(): for k, v in metric.items(): print("{}_{}_{}: {}".format(name, metric_name, k, v)) main_output_csv.write( "%s,%s,%s,%s\n" %(name, metric_name, k, v)) confusion_matrix = np.round(metrics.confusion_matrix, 3) print("confusion matrix:\n%s" %confusion_matrix) main_output_csv.write("%s\n" %( "\n".join(str(i) for i in confusion_matrix) ))
def validate(cfg, args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Dataloader data_loader = get_loader(cfg['data']['dataset']) data_path = cfg['data']['path'] loader = data_loader( data_path, split=cfg['data']['val_split'], is_transform=True, img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), fold=cfg['data']['fold'], n_classes=cfg['data']['n_classes'] ) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=cfg['training']['batch_size'], num_workers=1) running_metrics = runningScore(n_classes) # Setup Model model = get_model(cfg['model'], n_classes).to(device) state = convert_state_dict(torch.load(args.model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) for i, (images, labels) in enumerate(valloader): start_time = timeit.default_timer() images = images.to(device) done = False while not done: try: outputs = model(images) done = True break except: print('Caught an exception with image ', i) torch.cuda.empty_cache() pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.numpy() if args.measure_time: elapsed_time = timeit.default_timer() - start_time print( "Inference time \ (iter {0:5d}): {1:3.5f} fps".format( i + 1, pred.shape[0] / elapsed_time ) ) running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, class_iou[i])
def train(cfg, writer, logger): # Setup seeds torch.manual_seed(cfg.get("seed", 1337)) torch.cuda.manual_seed(cfg.get("seed", 1337)) np.random.seed(cfg.get("seed", 1337)) random.seed(cfg.get("seed", 1337)) # Setup device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Augmentations augmentations = cfg["training"].get("augmentations", None) data_aug = get_composed_augmentations(augmentations) # Setup Dataloader data_loader = get_loader(cfg["data"]["dataset"]) data_path = cfg["data"]["path"] t_loader = data_loader( data_path, is_transform=True, split=cfg["data"]["train_split"], img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]), augmentations=data_aug, ) v_loader = data_loader( data_path, is_transform=True, split=cfg["data"]["val_split"], img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]), ) n_classes = t_loader.n_classes trainloader = data.DataLoader( t_loader, batch_size=cfg["training"]["batch_size"], num_workers=cfg["training"]["n_workers"], shuffle=True, ) valloader = data.DataLoader(v_loader, batch_size=cfg["training"]["batch_size"], num_workers=cfg["training"]["n_workers"]) # Setup Metrics running_metrics_val = runningScore(n_classes) # Setup Model model = get_model(cfg["model"], n_classes).to(device) state = convert_state_dict(torch.load(args.model_path)) model.load_state_dict(state) model.eval() model.to(device) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) # Setup optimizer, lr_scheduler and loss function optimizer_cls = get_optimizer(cfg) optimizer_params = { k: v for k, v in cfg["training"]["optimizer"].items() if k != "name" } optimizer = optimizer_cls(model.parameters(), **optimizer_params) logger.info("Using optimizer {}".format(optimizer)) scheduler = get_scheduler(optimizer, cfg["training"]["lr_schedule"]) loss_fn = get_loss_function(cfg) logger.info("Using loss {}".format(loss_fn)) start_iter = 0 if cfg["training"]["resume"] is not None: if os.path.isfile(cfg["training"]["resume"]): logger.info( "Loading model and optimizer from checkpoint '{}'".format( cfg["training"]["resume"])) checkpoint = torch.load(cfg["training"]["resume"]) model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) scheduler.load_state_dict(checkpoint["scheduler_state"]) start_iter = checkpoint["epoch"] logger.info("Loaded checkpoint '{}' (iter {})".format( cfg["training"]["resume"], checkpoint["epoch"])) else: logger.info("No checkpoint found at '{}'".format( cfg["training"]["resume"])) val_loss_meter = averageMeter() time_meter = averageMeter() best_iou = -100.0 i = start_iter flag = True while i <= cfg["training"]["train_iters"] and flag: for (images, labels) in trainloader: i += 1 start_ts = time.time() scheduler.step() model.train() if torch.max(labels) > n_classes or torch.min(labels) < 0: print(torch.min(labels), torch.max(labels)) images = images.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(images) loss = loss_fn(input=outputs, target=labels) loss.backward() optimizer.step() time_meter.update(time.time() - start_ts) if (i + 1) % cfg["training"]["print_interval"] == 0: fmt_str = "Iter [{:d}/{:d}] Loss: {:.4f} Time/Image: {:.4f}" print_str = fmt_str.format( i + 1, cfg["training"]["train_iters"], loss.item(), time_meter.avg / cfg["training"]["batch_size"], ) print(print_str) logger.info(print_str) writer.add_scalar("loss/train_loss", loss.item(), i + 1) time_meter.reset() if (i + 1) % cfg["training"]["val_interval"] == 0 or ( i + 1) == cfg["training"]["train_iters"]: model.eval() with torch.no_grad(): for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)): images_val = images_val.to(device) labels_val = labels_val.to(device) outputs = model(images_val) val_loss = loss_fn(input=outputs, target=labels_val) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels_val.data.cpu().numpy() running_metrics_val.update(gt, pred) val_loss_meter.update(val_loss.item()) writer.add_scalar("loss/val_loss", val_loss_meter.avg, i + 1) logger.info("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg)) score, class_iou = running_metrics_val.get_scores() for k, v in score.items(): print(k, v) logger.info("{}: {}".format(k, v)) writer.add_scalar("val_metrics/{}".format(k), v, i + 1) for k, v in class_iou.items(): logger.info("{}: {}".format(k, v)) writer.add_scalar("val_metrics/cls_{}".format(k), v, i + 1) val_loss_meter.reset() running_metrics_val.reset() if score["Mean IoU : \t"] >= best_iou: best_iou = score["Mean IoU : \t"] state = { "epoch": i + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict(), "best_iou": best_iou, } save_path = os.path.join( writer.file_writer.get_logdir(), "{}_{}_best_model.pkl".format(cfg["model"]["arch"], cfg["data"]["dataset"]), ) torch.save(state, save_path) if (i + 1) == cfg["training"]["train_iters"]: flag = False break
def __init__(self, gen_pcl=True): """ Constructor \param gen_pcl (bool) whether generate point cloud, if set to true the node will subscribe to depth image """ # Get point type point_type = rospy.get_param('/semantic_pcl/point_type') #point_type = 0 if point_type == 0: self.point_type = PointType.COLOR print('Generate color point cloud.') elif point_type == 1: self.point_type = PointType.SEMANTICS_MAX print('Generate semantic point cloud [max fusion].') elif point_type == 2: self.point_type = PointType.SEMANTICS_BAYESIAN print('Generate semantic point cloud [bayesian fusion].') else: print("Invalid point type.") return # Get image size # self.img_width, self.img_height = rospy.get_param('/camera/width'), rospy.get_param('/camera/height') self.img_width, self.img_height = 640, 480 # Set up CNN is use semantics if self.point_type is PointType.COLOR: print('Setting up CNN model...') # Set device self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #GPU: device=cuda # Get dataset dataset = rospy.get_param('/semantic_pcl/dataset') # Setup model model_name = 'pspnet' model_path = rospy.get_param('/semantic_pcl/model_path') #model_path = '/home/yubao/data/SpacialAI/catkin_ws/src/dataset/pspnet_sunrgbd_best_model180625_5k.pth' if dataset == 'sunrgbd': # If use version fine tuned on sunrgbd dataset self.n_classes = 38 # Semantic class number self.model = get_model(model_name, self.n_classes, version='sunrgbd_res50') state = torch.load(model_path, map_location='cuda:0') self.model.load_state_dict(state) self.cnn_input_size = (321, 321) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset elif dataset == 'ade20k': self.n_classes = 150 # Semantic class number self.model = get_model(model_name, self.n_classes, version='ade20k') state = torch.load(model_path) self.model.load_state_dict( convert_state_dict(state['model_state']) ) # Remove 'module' from dictionary keys self.cnn_input_size = (473, 473) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset self.model = self.model.to(self.device) self.model.eval() self.cmap = color_map( N=self.n_classes, normalized=False) # Color map for semantic classes if self.point_type is not PointType.COLOR: print('Setting up CNN model...') # Set device self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # Get dataset dataset = rospy.get_param('/semantic_pcl/dataset') # Setup model model_name = 'pspnet' model_path = rospy.get_param('/semantic_pcl/model_path') if dataset == 'sunrgbd': # If use version fine tuned on sunrgbd dataset self.n_classes = 38 # Semantic class number self.model = get_model(model_name, self.n_classes, version='sunrgbd_res50') state = torch.load(model_path) self.model.load_state_dict(state) self.cnn_input_size = (321, 321) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset elif dataset == 'ade20k': self.n_classes = 150 # Semantic class number self.model = get_model(model_name, self.n_classes, version='ade20k') state = torch.load(model_path) self.model.load_state_dict( convert_state_dict(state['model_state']) ) # Remove 'module' from dictionary keys self.cnn_input_size = (473, 473) self.mean = np.array([104.00699, 116.66877, 122.67892]) # Mean value of dataset self.model = self.model.to(self.device) self.model.eval() self.cmap = color_map( N=self.n_classes, normalized=False) # Color map for semantic classes # Declare array containers if self.point_type is PointType.SEMANTICS_BAYESIAN: self.semantic_colors = np.zeros( (3, self.img_height, self.img_width, 3), dtype=np.uint8 ) # Numpy array to store 3 decoded semantic images with highest confidences self.confidences = np.zeros( (3, self.img_height, self.img_width), dtype=np.float32 ) # Numpy array to store top 3 class confidences # Set up ROS print('Setting up ROS...') self.bridge = CvBridge( ) # CvBridge to transform ROS Image message to OpenCV image # Semantic image publisher self.sem_img_pub = rospy.Publisher("/semantic_pcl/semantic_image", Image, queue_size=1) # Set up ros image subscriber # Set buff_size to average msg size to avoid accumulating delay if gen_pcl: # Point cloud frame id frame_id = rospy.get_param('/semantic_pcl/frame_id') # Camera intrinsic matrix fx = rospy.get_param('/camera/fx') fy = rospy.get_param('/camera/fy') cx = rospy.get_param('/camera/cx') cy = rospy.get_param('/camera/cy') intrinsic = np.matrix([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32) self.pcl_pub = rospy.Publisher("/semantic_pcl/semantic_pcl", PointCloud2, queue_size=1) self.color_sub = message_filters.Subscriber( rospy.get_param('/semantic_pcl/color_image_topic'), Image, queue_size=1, buff_size=30 * 480 * 640) self.depth_sub = message_filters.Subscriber( rospy.get_param('/semantic_pcl/depth_image_topic'), Image, queue_size=1, buff_size=40 * 480 * 640 ) # increase buffer size to avoid del ay (despite queue_size = 1) self.ts = message_filters.ApproximateTimeSynchronizer( [self.color_sub, self.depth_sub], queue_size=1, slop=0.3 ) # Take in one color image and one depth image with a limite time gap between message time stamps self.ts.registerCallback(self.color_depth_callback) self.cloud_generator = ColorPclGenerator(intrinsic, self.img_width, self.img_height, frame_id, self.point_type) else: self.image_sub = rospy.Subscriber( rospy.get_param('/semantic_pcl/color_image_topic'), Image, self.color_callback, queue_size=1, buff_size=30 * 480 * 640) #self.image_sub = rospy.Subscriber('/kinect2/hd/image_color_rect', Image, self.color_callback, queue_size = 1, buff_size = 30*480*640) print('Ready.')
def test(args): imgList = glob.glob('datasets/cityscapes/leftImg8bit/val/*/*_leftImg8bit.png') outputDir = 'datasets/cityscapes/results' overlayedDir = 'datasets/cityscapes/overlayed_results' gtSemDir = 'datasets/cityscapes/gtFine/val' data_loader = get_loader(args.dataset) loader = data_loader(root=None, is_transform=True, img_norm=args.img_norm, test_mode=True) n_classes = loader.n_classes device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = 'icnet_is' if args.origianl_icnet_semantic_pred: model_dict = {"arch": "icnet"} model_icnet = get_model(model_dict, n_classes, version=args.dataset) state = convert_state_dict(torch.load("pretrained_models/icnetBN_cityscapes_trainval_90k.pth")["model_state"]) # state = torch.load(args.model_path)["model_state"] model_icnet.load_state_dict(state) model_icnet.eval() model_icnet.to(device) # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version=args.dataset) model = FullModel(model,None) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state = torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) img_processed = 0 for imgPath in imgList: img_processed += 1 imgId = os.path.split(imgPath)[1].split('.')[0] output_txt = open(os.path.join(outputDir, imgId + '.txt'), 'w') # import ipdb # ipdb.set_trace() # Setup image print("Read Input Image from : {} ({}/{})".format(imgPath, img_processed, len(imgList))) # if img_processed > 10: break img = imageio.imread(imgPath) original_img = Image.fromarray(img).convert('RGBA') # resized_img = misc.imresize(img, (1025, 2049), interp="bicubic") orig_size = img.shape[:-1] # if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is"]: # # uint8 with RGB mode, resize width and height which are odd numbers # img = misc.imresize(img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1), 'bilinear') # else: # img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = pad_one_more(img) img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean # if args.img_norm: # img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() images = img.to(device) if args.origianl_icnet_semantic_pred: outputs = model_icnet(images) _, outputs_inst = model.model(images) else: outputs, outputs_inst = model.model(images) # if args.dcrf: # unary = outputs.data.cpu().numpy() # unary = np.squeeze(unary, 0) # unary = -np.log(unary) # unary = unary.transpose(2, 1, 0) # w, h, c = unary.shape # unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1) # unary = np.ascontiguousarray(unary) # resized_img = np.ascontiguousarray(resized_img) # d = dcrf.DenseCRF2D(w, h, loader.n_classes) # d.setUnaryEnergy(unary) # d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1) # q = d.inference(50) # mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0) # decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8)) # dcrf_path = args.out_path[:-4] + "_drf.png" # misc.imsave(dcrf_path, decoded_crf) # print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path)) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) pred = remove_pad_one_more(np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)) outputs_inst = outputs_inst.cpu().detach().numpy() outputs_sem = outputs.cpu().detach().numpy() # if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is"]: # pred = pred.astype(np.float32) # # float32 with F mode, resize back to orig_size # pred = misc.imresize(pred, orig_size, "nearest", mode="F") # outputs_inst = misc.imresize(outputs_inst, orig_size, "nearest", mode="F") # outputs_sem = misc.imresize(outputs_sem, orig_size, "nearest", mode="F") outputs_inst = outputs_inst[0, ...] outputs_inst = outputs_inst.transpose((1, 2, 0)) outputs_inst = remove_pad_one_more(outputs_inst) outputs_sem = outputs_sem[0, ...] outputs_sem = outputs_sem.transpose((1, 2, 0)) outputs_sem = remove_pad_one_more(outputs_sem) h, w, c = outputs_inst.shape pred_flattened = pred.reshape((h * w)) outputs_inst_flattened = np.copy(outputs_inst.reshape((h * w, c))) inst_num = 0 min_inst_size = 500 single_obj_dist = 1.5 bd_decay_rate = 0.9 if args.use_gt_sem_map: imgId_np = ('_').join(imgId.split('_')[:-1]) gtImgDir = os.path.join(gtSemDir, imgId.split('_')[0], imgId_np + '_gtFine_labelTrainIds.png') pred = imageio.imread(gtImgDir) # pred_flattened = pred.reshape(h * w) pred_flattened = misc.imresize(pred, (outputs_sem.shape[0], outputs_sem.shape[1])).reshape((h * w)) for inst_class in has_inst_class: interested_semantic_class_train_id = inst_class['trainID'] predID = inst_class['id'] # if interested_semantic_class_train_id != 13: continue if np.sum(pred_flattened == interested_semantic_class_train_id) == 0: continue inst_segment_map = np.zeros((h * w), dtype = np.uint16) avg_dist = estimate_bandwidth(outputs_inst_flattened[pred_flattened == interested_semantic_class_train_id, :], quantile=1.0, n_samples=1000, n_jobs = 12) if avg_dist > single_obj_dist: bandwidth = inst_class['bandwidth'] while True: # ms = MeanShift(bandwidth=inst_class['bandwidth'], bin_seeding=True, n_jobs = 12) try: ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, n_jobs = 12) ms.fit(outputs_inst_flattened[pred_flattened == interested_semantic_class_train_id, :]) clustering_label = ms.labels_ break except: bandwidth *= bd_decay_rate print(bandwidth) inst_segment_map[pred_flattened == interested_semantic_class_train_id] = clustering_label + 1 else: inst_segment_map[pred_flattened == interested_semantic_class_train_id] = 1 for lbl in range(inst_segment_map.max()): if np.sum(inst_segment_map == lbl + 1) < min_inst_size: continue inst_num += 1 mask_file_name = imgId + '_inst_{:03d}.png'.format(inst_num) mask_dir = os.path.join(outputDir, mask_file_name) mask_img = np.zeros((h * w), dtype = np.uint8) mask_img[inst_segment_map == lbl + 1] = 255 mask_img.resize((h, w)) # mask_img_orig_size = misc.imresize(mask_img, orig_size) imageio.imsave(mask_dir, mask_img) sem_lbl_pred = predID conf = np.mean(outputs_sem[..., interested_semantic_class_train_id][mask_img > 0]) - outputs_sem.min() output_txt.write(mask_file_name + ' ' + str(sem_lbl_pred) + ' {:.4f}\n'.format(conf)) if inst_num > 0: # import ipdb # ipdb.set_trace() inst_segment_map = inst_segment_map.reshape(h, w) cmap = plt.cm.jet norm = plt.Normalize(vmin=inst_segment_map.min(), vmax=inst_segment_map.max()) # import ipdb # ipdb.set_trace() # map the normalized data to colors # image is now RGBA (512x512x4) inst_segment_map_single_image = cmap(norm(inst_segment_map)) inst_segment_map_single_image[inst_segment_map == 0] = [0, 0, 0, 1] inst_segment_map_single_image = Image.fromarray((inst_segment_map_single_image * 255).astype(np.uint8)) # save the image # inst_segment_map_single_image.save('inst_seg_map_' + args.out_path) # import ipdb # ipdb.set_trace() original_img = original_img.resize(inst_segment_map_single_image.size) inst_segment_map_single_image.putalpha(128) overlayed_image = Image.alpha_composite(original_img, inst_segment_map_single_image) overlayed_image_path = os.path.join(overlayedDir, str(interested_semantic_class_train_id), imgId + '.png') print(overlayed_image_path) if not os.path.exists(os.path.dirname(overlayed_image_path)): try: os.makedirs(os.path.dirname(overlayed_image_path)) except: pass overlayed_image.save(overlayed_image_path) output_txt.close()
def test(args): # Setup image print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, is_transform=True) n_classes = loader.n_classes resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp='bicubic') img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img.astype(float) / 255.0 # NHWC -> NCWH img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() # Setup Model model = get_model(args.model_path[:args.model_path.find('_')], n_classes) state = convert_state_dict(torch.load(args.model_path)['model_state']) model.load_state_dict(state) model.eval() model.cuda(0) images = Variable(img.cuda(0), volatile=True) outputs = F.softmax(model(images), dim=1) if args.dcrf == "True": unary = outputs.data.cpu().numpy() unary = np.squeeze(unary, 0) unary = -np.log(unary) unary = unary.transpose(2, 1, 0) w, h, c = unary.shape unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1) unary = np.ascontiguousarray(unary) resized_img = np.ascontiguousarray(resized_img) d = dcrf.DenseCRF2D(w, h, loader.n_classes) d.setUnaryEnergy(unary) d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1) q = d.inference(50) mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0) decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8)) dcrf_path = args.out_path[:-4] + '_drf.png' misc.imsave(dcrf_path, decoded_crf) print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path)) if torch.cuda.is_available(): model.cuda(0) images = Variable(img.cuda(0), volatile=True) else: images = Variable(img, volatile=True) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = loader.decode_segmap(pred) print('Classes found: ', np.unique(pred)) misc.imsave(args.out_path, decoded) print("Segmentation Mask Saved at: {}".format(args.out_path))
def test(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] model_name = 'icnet_is_wp' # import ipdb # ipdb.set_trace() # Setup image print("Read Input Image from : {}".format(args.img_path)) img = imageio.imread(args.img_path) original_img = Image.fromarray(img).convert('RGBA') data_loader = get_loader(args.dataset) loader = data_loader(root=None, is_transform=True, img_norm=args.img_norm, test_mode=True) n_classes = loader.n_classes resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp="bicubic") orig_size = img.shape[:-1] if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is", "icnet_is_wp"]: # uint8 with RGB mode, resize width and height which are odd numbers img = misc.imresize( img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1)) else: img = misc.imresize(img, (loader.img_size[0], loader.img_size[1])) img = img[:, :, ::-1] img = img.astype(np.float64) img -= loader.mean # if args.img_norm: # img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() images = img.to(device) if args.origianl_icnet_semantic_pred: model_dict = {"arch": "icnet"} model = get_model(model_dict, n_classes, version=args.dataset) state = convert_state_dict( torch.load("pretrained_models/icnetBN_cityscapes_trainval_90k.pth") ["model_state"]) # state = torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) outputs = model(images) # Setup Model model_dict = {"arch": model_name} model = get_model(model_dict, n_classes, version=args.dataset) model = FullModel(model, None) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state = torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) if args.origianl_icnet_semantic_pred: _, outputs_inst = model.model(images) else: outputs, outputs_inst = model.model(images) if args.dcrf: unary = outputs.data.cpu().numpy() unary = np.squeeze(unary, 0) unary = -np.log(unary) unary = unary.transpose(2, 1, 0) w, h, c = unary.shape unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1) unary = np.ascontiguousarray(unary) resized_img = np.ascontiguousarray(resized_img) d = dcrf.DenseCRF2D(w, h, loader.n_classes) d.setUnaryEnergy(unary) d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1) q = d.inference(50) mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0) decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8)) dcrf_path = args.out_path[:-4] + "_drf.png" misc.imsave(dcrf_path, decoded_crf) print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path)) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) pred_original = np.copy(pred) if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is"]: pred = pred.astype(np.float32) # float32 with F mode, resize back to orig_size pred = misc.imresize(pred, orig_size, "nearest", mode="F") interested_semantic_class_train_id = 17 outputs_inst = outputs_inst.cpu().detach().numpy() outputs_inst = outputs_inst[0, ...] outputs_inst = outputs_inst.transpose((1, 2, 0)) h, w, c = outputs_inst.shape outputs_inst_transformed = np.copy(outputs_inst.reshape((h * w, c))) pca = sklearnPCA(n_components=3) pca.fit(outputs_inst_transformed) outputs_inst_transformed = pca.transform(outputs_inst_transformed) outputs_inst_transformed -= outputs_inst_transformed.min(axis=0) outputs_inst_transformed /= outputs_inst_transformed.max(axis=0) outputs_inst_img = outputs_inst_transformed.reshape((h, w, 3)) outputs_inst_img = (outputs_inst_img * 255).astype(int) decoded = loader.decode_segmap(pred) print("Classes found: ", np.unique(pred)) imageio.imsave(args.out_path, decoded) imageio.imsave("inst_" + args.out_path, outputs_inst_img) print("Segmentation Mask Saved at: {}".format(args.out_path)) outputs_inst_transformed_single = np.copy(outputs_inst.reshape((h * w, c))) pred_transformed = pred_original.reshape((h * w)) pca.fit(outputs_inst_transformed_single[ pred_transformed == interested_semantic_class_train_id, :]) outputs_inst_transformed_single = pca.transform( outputs_inst_transformed_single) outputs_inst_transformed_single -= outputs_inst_transformed_single.min( axis=0) outputs_inst_transformed_single /= outputs_inst_transformed_single.max( axis=0) outputs_inst_transformed_single[ pred_transformed != interested_semantic_class_train_id, :] = 0 outputs_inst_single_img = outputs_inst_transformed_single.reshape( (h, w, 3)) outputs_inst_single_img = Image.fromarray( (outputs_inst_single_img * 255).astype(np.uint8)) outputs_inst_single_img.save("inst_single_" + args.out_path) outputs_inst_transformed_single = np.copy(outputs_inst.reshape((h * w, c))) bandwidth = estimate_bandwidth(outputs_inst_transformed_single[ pred_transformed == interested_semantic_class_train_id, :], quantile=0.1, n_samples=1000, n_jobs=12) print(bandwidth) ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, n_jobs=12) ms.fit(outputs_inst_transformed_single[ pred_transformed == interested_semantic_class_train_id, :]) clustering_label = ms.labels_ inst_segment_map_single = np.zeros((h * w)) inst_segment_map_single[ pred_transformed == interested_semantic_class_train_id] = clustering_label + 1 inst_segment_map_single = inst_segment_map_single.reshape(h, w) cmap = plt.cm.jet norm = plt.Normalize(vmin=inst_segment_map_single.min(), vmax=inst_segment_map_single.max()) # import ipdb # ipdb.set_trace() # map the normalized data to colors # image is now RGBA (512x512x4) inst_segment_map_single_image = cmap(norm(inst_segment_map_single)) inst_segment_map_single_image[inst_segment_map_single == 0] = [0, 0, 0, 1] inst_segment_map_single_image = Image.fromarray( (inst_segment_map_single_image * 255).astype(np.uint8)) # save the image inst_segment_map_single_image.save('inst_seg_map_' + args.out_path) # import ipdb # ipdb.set_trace() original_img = original_img.resize(inst_segment_map_single_image.size) inst_segment_map_single_image.putalpha(128) overlayed_image = Image.alpha_composite(original_img, inst_segment_map_single_image) overlayed_image.save('inst_seg_map_overlayed_' + args.out_path)
def test(args, cfg): # os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device=torch.device("cuda:0") # device_1=torch.device("cpu") model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find("_")] IMG_Path = Path(args.img_path) IMG_File = natsort.natsorted(list(IMG_Path.glob("*.png")), alg=natsort.PATH) IMG_Str = [] for i in IMG_File: IMG_Str.append(str(i)) # Setup image print("Read Input Image from : {}".format(args.img_path)) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset, config_file=cfg) loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm) n_classes = loader.n_classes # Setup Model model = get_model(cfg['model'], n_classes) state = convert_state_dict(torch.load(args.model_path)["model_state"]) # state=torch.load(args.model_path)["model_state"] model.load_state_dict(state) model.eval() model.to(device) for j in tqdm(range(len(IMG_Str))): img_path = IMG_Str[j] img_input = misc.imread(img_path) sp = list(img_input.shape) #shape height*width*channel sp = sp[0:2] ori_size = tuple(sp) # img = img[:, :, ::-1] # multiscale # img_125=cv.resize(img,dsize=(0,0),fx=1.25,fy=1.25,interpolation=cv.INTER_LINEAR) # img_075=cv.resize(img,dsize=(0,0),fx=0.75,fy=0.75,interpolation=cv.INTER_LINEAR) # scale_list=[2.0,1.75,1.5,1.25,1,0.75,0.5] scale_list = [1.5, 1.25, 0.75, 0.5] # scale_list=[1.4,1.2,0.8,0.6] # scale_list=[2.0] multi_avg = torch.zeros((1, 6, 512, 512), dtype=torch.float32).to(device) # torch.zeros(batch-size,num-classes,height,width) for scale in scale_list: if scale != 1: img = cv.resize(img_input, dsize=(0, 0), fx=scale, fy=scale, interpolation=cv.INTER_LINEAR) else: img = img_input img = img.astype(np.float64) # img -= loader.mean if args.img_norm: img = img.astype(float) / 255.0 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() images = img.to(device) outputs = model(images) # del images # bilinear is ok for both upsample and downsample if scale != 1: outputs = F.upsample(outputs, ori_size, mode='bilinear', align_corners=False) # outputs=outputs.to(device) multi_avg = multi_avg + outputs # del outputs # outputs=multi_avg/len(scale_list) outputs = multi_avg out_path = "test_out/mv3_1_true_2_res50_data10_MS/mv3_1_true_2_res50_data10_MS_7/" + Path( img_path).stem + "_S4_not_1.pt" torch.save(outputs, out_path)