def __call__(self, img, gt, hha, depth, coord, camera_params): img, gt, depth, coord = random_mirror(img, gt, depth, coord) if config.train_scale_array is not None: img, gt, hha, depth, coord, scale = random_scale( img, gt, hha, depth, coord, config.train_scale_array) camera_params['scale'] = torch.from_numpy( np.array(scale, dtype=np.float32)).float() img = normalize(img, self.img_mean, self.img_std) depth = normalize(depth, self.depth_mean, self.depth_var) # hha = normalize(hha, self.hha_mean, self.hha_std) crop_size = (config.image_height, config.image_width) crop_pos = generate_random_crop_pos(img.shape[:2], crop_size) p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0) p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255) p_hha, _ = random_crop_pad_to_shape(hha, crop_pos, crop_size, 0) depth, _ = random_crop_pad_to_shape(depth, crop_pos, crop_size, 0) coord, _ = random_crop_pad_to_shape(coord, crop_pos, crop_size, -1) p_img = p_img.transpose(2, 0, 1) p_hha = p_hha.transpose(2, 0, 1) depth = depth[np.newaxis, ...] coord = coord.transpose(2, 0, 1) extra_dict = { 'hha_img': p_hha, 'depth_img': depth, 'coord_img': coord, 'camera_params': camera_params } return p_img, p_gt, extra_dict
def __call__(self, ref_img, cur_img, ref_mask, cur_mask): common_bbox = generate_random_common_bbox(ref_mask, cur_mask) ref_img = ref_img[common_bbox[1]:common_bbox[3], common_bbox[0]:common_bbox[2], :] cur_img = cur_img[common_bbox[1]:common_bbox[3], common_bbox[0]:common_bbox[2], :] cur_mask = cur_mask[common_bbox[1]:common_bbox[3], common_bbox[0]:common_bbox[2]] #ref_img, cur_img, ref_mask, cur_mask = random_scale_crop(ref_img, cur_img, ref_mask, cur_mask) ref_img = cv2.resize(ref_img, (config.image_width, config.image_height)) cur_img = cv2.resize(cur_img, (config.image_width, config.image_height)) cur_mask = cv2.resize(cur_mask, (config.image_width, config.image_height), interpolation=cv2.INTER_NEAREST) ref_img, cur_img, cur_mask = random_hflip_adnet( ref_img, cur_img, cur_mask) ref_img = normalize(ref_img, self.img_mean, self.img_std) cur_img = normalize(cur_img, self.img_mean, self.img_std) ref_img, cur_img, cur_mask = random_rotation_adnet( ref_img, cur_img, cur_mask) ref_img = ref_img.transpose(2, 0, 1) cur_img = cur_img.transpose(2, 0, 1) cur_mask = np.expand_dims(cur_mask, 0) extra_dict = None return ref_img, cur_img, cur_mask, extra_dict
def __call__(self, img, hha): img = normalize(img, self.img_mean, self.img_std) hha = normalize(hha, self.img_mean, self.img_std) p_img = img.transpose(2, 0, 1) p_hha = hha.transpose(2, 0, 1) extra_dict = {'hha_img': p_hha} return p_img, extra_dict
def __call__(self, img, gt): img, gt = random_mirror(img, gt) short_size = random.randint(int(config.base_size * 0.5), int(config.base_size * 2.0)) h, w, c = img.shape if h > w: ow = short_size oh = int(1.0 * h * ow / w) else: oh = short_size ow = int(1.0 * w * oh / h) img = cv2.resize(img, (ow, oh), interpolation=cv2.INTER_LINEAR) gt = cv2.resize(gt, (ow, oh), interpolation=cv2.INTER_NEAREST) img = normalize(img, self.img_mean, self.img_std) crop_size = (config.image_height, config.image_width) crop_pos = generate_random_crop_pos(img.shape[:2], crop_size) p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0) p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255) p_gt = cv2.resize(p_gt, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_img = p_img.transpose(2, 0, 1) extra_dict = None return p_img, p_gt, extra_dict
def __call__(self, img, gt): #flip img, gt = random_mirror(img, gt) #according the paper if config.train_scale_array is not None: img, gt, scale = random_scale(img, gt, config.train_scale_array) id255 = np.where(gt == 255) no255_gt = np.array(gt) no255_gt[id255] = 0 cgt = cv2.Canny(no255_gt, 5, 5, apertureSize=7) #get border imformation from canny cgt = cv2.dilate(cgt, self.edge_kernel) cgt[cgt == 255] = 1 #img white img = normalize(img, self.img_mean, self.img_std) crop_size = (config.image_height, config.image_width) crop_pos = generate_random_crop_pos(img.shape[:2], crop_size) p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0) p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255) p_cgt, _ = random_crop_pad_to_shape(cgt, crop_pos, crop_size, 255) p_img = p_img.transpose(2, 0, 1) extra_dict = {'aux_label': p_cgt} return p_img, p_gt, extra_dict
def __call__(self, img, gt): img, gt = random_mirror(img, gt) if config.train_scale_array is not None: img, gt, scale = random_scale(img, gt, config.train_scale_array) img = normalize(img, self.img_mean, self.img_std) crop_size = (config.image_height, config.image_width) crop_pos = generate_random_crop_pos(img.shape[:2], crop_size) p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0) p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 0) # scaled_gt = cv2.resize(p_gt, ( # config.image_width // 8, config.image_height // 8), # interpolation=cv2.INTER_NEAREST) # # C = config.num_classes + 1 # one_hot_gt = convert_to_one_hot(scaled_gt.astype(np.int), C) # similarity_gt = np.dot(one_hot_gt, one_hot_gt.T) p_img = p_img.transpose(2, 0, 1) # p_gt = p_gt - 1 extra_dict = None return p_img, p_gt, extra_dict
def process_image_rgbd_coord(self, img, hha, depth, coord, crop_size=None): p_img = img p_hha = hha p_depth = depth p_coord = coord if img.shape[2] < 3: im_b = p_img im_g = p_img im_r = p_img p_img = np.concatenate((im_b, im_g, im_r), axis=2) p_img = normalize(p_img, self.image_mean, self.image_std) # p_depth = normalize(p_depth, 0, 1) if crop_size is not None: p_img, margin = pad_image_to_shape(p_img, crop_size, cv2.BORDER_CONSTANT, value=0) p_hha, margin = pad_image_to_shape(p_hha, crop_size, cv2.BORDER_CONSTANT, value=0) p_depth, margin = pad_image_to_shape(p_depth, crop_size, cv2.BORDER_CONSTANT, value=0) p_coord, margin = pad_image_to_shape(p_coord, crop_size, cv2.BORDER_CONSTANT, value=0) p_img = p_img.transpose(2, 0, 1) p_hha = p_hha.transpose(2, 0, 1) p_depth = p_depth[np.newaxis,...] p_coord = p_coord.transpose(2, 0, 1) return p_img, p_hha, p_depth, p_coord, margin
def __call__(self, img, gt, edge, midline): img, gt, edge, midline = random_mirror(img, gt, edge, midline) gt = img_to_black(gt) edge = img_to_black(edge) midline = img_to_black(midline) if config.train_scale_array is not None: img, gt, scale, edge, midline = random_scale( img, gt, config.train_scale_array, edge, midline) img = normalize(img, self.img_mean, self.img_std) p_img, p_gt, p_edge, p_midline = img, gt, edge, midline p_img = cv2.resize(p_img, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_gt = cv2.resize(p_gt, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_edge = cv2.resize(p_edge, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_midline = cv2.resize( p_midline, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_img = p_img.transpose(2, 0, 1) extra_dict = None if p_gt.max() > 1: print(p_gt) return p_img, p_gt, extra_dict, p_edge, p_midline
def __call__(self, img, gt, edge, midline): img, gt, edge, midline = random_mirror( img, gt, edge, midline) # images are randomly flipped to increase variance gt = img_to_black(gt) # binary filter on gt. edge = img_to_black(edge) midline = img_to_black(midline) if config.train_scale_array is not None: img, gt, scale, edge, midline = random_scale( img, gt, config.train_scale_array, edge, midline) # scale the images with supplied list img = normalize(img, self.img_mean, self.img_std) crop_size = (200, 200) crop_pos = generate_random_crop_pos( img.shape[:2], crop_size) # obtain random location p_img, _ = random_crop_pad_to_shape( img, crop_pos, crop_size, 0) # get the cropped images and re-sized to crop-size p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, -1) p_edge, _ = random_crop_pad_to_shape(edge, crop_pos, crop_size, -1) p_midline, _ = random_crop_pad_to_shape(midline, crop_pos, crop_size, -1) p_img = cv2.resize( p_img, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) # resize by downsampling p_gt = cv2.resize(p_gt, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_edge = cv2.resize(p_edge, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_midline = cv2.resize( p_midline, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_img = p_img.transpose(2, 0, 1) extra_dict = None if p_gt.max() > 1: print(p_gt) return p_img, p_gt, extra_dict, p_edge, p_midline
def process_image_rgbd(self, img, disp, crop_size=None): p_img = img p_disp = disp if img.shape[2] < 3: im_b = p_img im_g = p_img im_r = p_img p_img = np.concatenate((im_b, im_g, im_r), axis=2) p_img = normalize(p_img, self.image_mean, self.image_std) if len(disp.shape) == 2: p_disp = normalize(p_disp, 0, 1) else: p_disp = normalize(p_disp, self.image_mean, self.image_std) if crop_size is not None: p_img, margin = pad_image_to_shape(p_img, crop_size, cv2.BORDER_CONSTANT, value=0) p_disp, _ = pad_image_to_shape(p_disp, crop_size, cv2.BORDER_CONSTANT, value=0) p_img = p_img.transpose(2, 0, 1) if len(disp.shape) == 2: p_disp = p_disp[np.newaxis, ...] else: p_disp = p_disp.transpose(2, 0, 1) return p_img, p_disp, margin p_img = p_img.transpose(2, 0, 1) if len(disp.shape) == 2: p_disp = p_disp[np.newaxis, ...] else: p_disp = p_disp.transpose(2, 0, 1) return p_img, p_disp
def pre_processing(img_path): img = read_image(img_path) img = cv2.resize(img, size) img = Image.fromarray(img) img = transforms.ToTensor()(img) # img = normalize(img, tuple([0.5, 0.5, 0.5]), tuple([0.5, 0.5, 0.5]), # inplace=True) img = normalize(img, tuple([0.408, 0.447, 0.47]), tuple([0.289, 0.274, 0.278]), inplace=True) return img
def __getitem__(self, index): datafiles = self.files[index] image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) size = image.shape name = datafiles["name"] image = normalize(image, np.array(self.mean), np.array(self.std)) image = image.transpose(2, 0, 1) return image.copy(), label.copy(), np.array(size), name
def __call__(self, img, gt): img, gt = random_mirror(img, gt) if config.train_scale_array is not None: img, gt, scale = random_scale(img, gt, config.train_scale_array) img = normalize(img, self.img_mean, self.img_std) crop_size = (config.image_height, config.image_width) crop_pos = generate_random_crop_pos(img.shape[:2], crop_size) p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0) p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 0) p_img = p_img.transpose(2, 0, 1) extra_dict = None return p_img, p_gt, extra_dict
def __call__(self, img, gt): img, gt = random_mirror(img, gt) if self.config.train_scale_array is not None: img, gt, scale = random_scale(img, gt, self.config.train_scale_array) img = normalize(img, self.img_mean, self.img_std) crop_size = (self.config.image_height, self.config.image_width) crop_pos = generate_random_crop_pos(img.shape[:2], crop_size) p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0) p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255) p_gt = cv2.resize(p_gt, (self.config.image_width // self.config.gt_down_sampling, self.config.image_height // self.config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_img = p_img.transpose(2, 0, 1) extra_dict = None return p_img, p_gt, extra_dict
def __call__(self, img, gt): (img, gt) = random_mirror(img, gt) gt = img_to_black(gt) if config.train_scale_array is not None: (img, gt, scale) = random_scale(img, gt, config.train_scale_array) img = normalize(img, self.img_mean, self.img_std) (p_img, p_gt) = (img, gt) p_img = cv2.resize(p_img, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_gt = cv2.resize(p_gt, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_img = p_img.transpose(2, 0, 1) extra_dict = None return (p_img, p_gt, extra_dict)
def process_image(self, img, crop_size=None): p_img = img if img.shape[2] < 3: im_b = p_img im_g = p_img im_r = p_img p_img = np.concatenate((im_b, im_g, im_r), axis=2) p_img = normalize(p_img, self.image_mean, self.image_std) if crop_size is not None: p_img, margin = pad_image_to_shape(p_img, crop_size, cv2.BORDER_CONSTANT, value=0) p_img = p_img.transpose(2, 0, 1) return p_img, margin p_img = p_img.transpose(2, 0, 1) return p_img
def __call__(self, img, gt): # img, gt = random_mirror(img, gt) # if self.config.train_scale_array is not None: # img, gt, scale = random_scale(img, gt, self.config.train_scale_array) # # img = normalize(img, self.img_mean, self.img_std) # # crop_size = (self.config.image_height, self.config.image_width) # crop_pos = generate_random_crop_pos(img.shape[:2], crop_size) # p_img, _ = random_crop_pad_to_shape(img, crop_pos, crop_size, 0) # p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255) # p_gt = cv2.resize(p_gt, (self.config.image_width // self.config.gt_down_sampling, self.config.image_height // self.config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) augment = self.augmenter(image=img, mask=gt) p_img, p_gt = augment['image'], augment['mask'] img = normalize(p_img, self.img_mean, self.img_std) p_img = img.transpose(2, 0, 1) extra_dict = None return p_img, p_gt, extra_dict
def __call__(self, img, gt): img, gt = random_mirror( img, gt) # images are randomly flipped to increase variance gt = img_to_black(gt) # binary filter on gt. if config.train_scale_array is not None: img, gt, scale = random_scale( img, gt, config.train_scale_array ) # scale the images with supplied list img = normalize(img, self.img_mean, self.img_std) crop_size = (200, 200) crop_pos = generate_random_crop_pos(img.shape[:2], crop_size) p_img, _ = random_crop_pad_to_shape( img, crop_pos, crop_size, 0) # get the cropped images and re-sized to crop-size p_gt, _ = random_crop_pad_to_shape(gt, crop_pos, crop_size, 255) # value= p_img = cv2.resize(p_img, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_gt = cv2.resize(p_gt, (config.image_width // config.gt_down_sampling, config.image_height // config.gt_down_sampling), interpolation=cv2.INTER_NEAREST) p_img = p_img.transpose(2, 0, 1) extra_dict = None return p_img, p_gt, extra_dict
def __getitem__(self, index): datafiles = self.files[index] image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) size = image.shape name = datafiles["name"] image = normalize(image, np.array(self.mean), np.array(self.std)) image, label = random_mirror(image, label) if self.scale: image, label, scale = random_scale(image, label, [0.75, 1, 1.25, 1.5, 1.75, 2.0]) crop_pos = generate_random_crop_pos(image.shape[:2], self.crop_size) image, _ = random_crop_pad_to_shape(image, crop_pos, self.crop_size, 0) label, _ = random_crop_pad_to_shape(label, crop_pos, self.crop_size, 255) image = image.transpose(2, 0, 1) return image.copy(), label.copy(), np.array(size), name
def predict(models: nn.ModuleList, img_path, path2save, thresh=0.5): """ Perfrom prediction for single image Params: models : NN models img_path : path to an image path2save : thresh : preiction threshold """ img_path = Path(img_path) if not img_path.exists(): raise FileNotFoundError("File '{}' not found.".format(str(img_path))) src_img = cv2.imread(str(img_path)) transform = test_trasformations() augmented = transform(image=src_img) src_img = augmented["image"] img2predict = src_img.copy() img2predict = cv2.cvtColor(img2predict, cv2.COLOR_BGR2RGB).astype(dtype=np.float32) img2predict = normalize(img2predict) img2predict = utils.to_gpu( numpy_to_tensor(img2predict).unsqueeze(0).contiguous()).float() if len(models) == 1: #evaluate mode model = models[0].eval() with torch.set_grad_enabled(False): predict = model(img2predict) #Probs predict = F.sigmoid(predict).squeeze(0).squeeze(0) mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8) overlayed_img = alpha_overlay(src_img, mask) else: #Averaging all predictions for one point of test data sum_predicts = utils.to_gpu( torch.zeros((1, 1, src_img.shape[0], src_img.shape[1])).float()) for model in models: model.eval() with torch.set_grad_enabled(False): predict = model(img2predict) sum_predicts += F.sigmoid(predict) predict = (sum_predicts / len(models)).squeeze(0).squeeze(0).float() mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8) overlayed_img = alpha_overlay(src_img, mask) #save cv2.imwrite(path2save, overlayed_img) #show cv2.imshow("Predicted", overlayed_img) cv2.waitKey(0) cv2.destroyAllWindows() print("Image '{}' was processed successfully.".format(str(img_path)))
def predict_batch(models: nn.ModuleList, path2images, path2save, thresh=0.5): """ Perfrom prediction for a batch images Params: models : NN models path2images : path to an image path2save : should be a dir thresh : preiction threshold """ path2images = Path(path2images) path2save = Path(path2save) if not path2images.is_dir(): raise RuntimeError("File '{}' is not dir.".format(str(path2images))) if not path2save.is_dir(): raise RuntimeError("File '{}' is not dir.".format(str(path2save))) imgs_paths = sorted(list(path2images.glob("*"))) count_processed = 0 for idx, ip in enumerate(imgs_paths): src_img = cv2.imread(str(ip)) transform = test_trasformations() augmented = transform(image=src_img) src_img = augmented["image"] img2predict = src_img.copy() img2predict = cv2.cvtColor(img2predict, cv2.COLOR_BGR2RGB).astype(dtype=np.float32) img2predict = normalize(img2predict) img2predict = utils.to_gpu( numpy_to_tensor(img2predict).unsqueeze(0).contiguous()).float() if len(models) == 1: model = models[0].eval() with torch.set_grad_enabled(False): predict = model(img2predict) #Probs predict = F.sigmoid(predict).squeeze(0).squeeze(0) mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8) overlayed_img = alpha_overlay(src_img, mask) else: #Averaging all predictions for one point of test data sum_predicts = utils.to_gpu( torch.zeros( (1, 1, src_img.shape[0], src_img.shape[1])).float()) for model in models: model.eval() with torch.set_grad_enabled(False): predict = model(img2predict) sum_predicts += F.sigmoid(predict) predict = (sum_predicts / len(models)).squeeze(0).squeeze(0).float() mask = (predict > thresh).cpu().numpy().astype(dtype=np.uint8) overlayed_img = alpha_overlay(src_img, mask) #save cv2.imwrite(str(path2save / "{}".format(ip.name)), overlayed_img) print("Image '{}' was processed successfully.".format(str(ip))) count_processed += 1 print("{} images were processed.".format(count_processed))
def main(): cudnn.enabled = True if args.model == 'base': model = ResNetDeepLabv3(backbone=args.backbone) elif args.model == 'intra': model = IntraFrameNet(backbone=args.backbone, pyramid_pooling=args.pyramid_pooling, embedding=args.embedding_size, batch_mode='sync') elif args.model == 'inter': model = InterFrameNet(backbone=args.backbone, pyramid_pooling=args.pyramid_pooling, embedding=args.embedding_size, batch_mode='sync') elif args.model == 'concat': model = ConcatNet(backbone=args.backbone, pyramid_pooling=args.pyramid_pooling, embedding=args.embedding_size, batch_mode='sync') elif args.model == 'ad': cfg = get_cfg() cfg.merge_from_file('./fpn_config/semantic_R_50_FPN_1x.yaml') model = AnchorDiffNet(cfg, embedding=args.embedding_size, batch_mode='sync') model.load_state_dict(torch.load('./log/snapshot/epoch-last.pth')['model']) model.eval() model.float() model.cuda() with torch.no_grad(): video_mean_iou_list = [] model.eval() videos = [i_id.strip() for i_id in open(osp.join(args.data_dir, 'ImageSets', '2016', 'val.txt'))] if args.video and args.video in videos: videos = [args.video] for vid, video in enumerate(videos, start=1): curr_video_iou_list = [] img_files = sorted(glob.glob(osp.join(args.data_dir, 'JPEGImages', '480p', video, '*.jpg'))) ann_files = sorted(glob.glob(osp.join(args.data_dir, 'Annotations', '480p', video, '*.png'))) #img_files = img_files[:10] #ann_files = ann_files[:10] if args.ms_mirror: w = 512 h = 256 resize_shape = [(w*0.5, h*0.5), (w, h), (w*1.5, h*1.5)] resize_shape = [(int((s[0])), int((s[1]))) for s in resize_shape] mirror = True else: #resize_shape = [(857, 481)] resize_shape = [(704, 480)] #resize_shape = [(720, 432)] mirror = False reference_img = [] for s in resize_shape: reference_img.append(normalize(np.asarray(cv2.resize(cv2.imread(img_files[0], cv2.IMREAD_COLOR)[:,:,::-1], s), np.float32), config.image_mean, config.image_std).transpose((2, 0, 1))) if mirror: for r in range(len(reference_img)): reference_img.append(reference_img[r][:, :, ::-1].copy()) reference_img = [torch.from_numpy(np.expand_dims(r, axis=0)).float().cuda() for r in reference_img] reference_mask = np.array(Image.open(ann_files[0])) > 0 reference_mask = torch.from_numpy(np.expand_dims(np.expand_dims(reference_mask.astype(np.float32), axis=0), axis=0)).cuda() H, W = reference_mask.size(2), reference_mask.size(3) if args.visualize: colors = np.random.randint(128, 255, size=(1, 3), dtype="uint8") colors = np.vstack([[0, 0, 0], colors]).astype("uint8") last_mask_num = 0 last_mask = None last_mask_final = None kernel1 = np.ones((15, 15), np.uint8) kernel2 = np.ones((101, 101), np.uint8) kernel3 = np.ones((31, 31), np.uint8) predictions_all = [] gt_all = [] for f, (img_file, ann_file) in enumerate(zip(img_files, ann_files)): current_img = [] for s in resize_shape: current_img.append(normalize(np.asarray(cv2.resize( cv2.imread(img_file, cv2.IMREAD_COLOR)[:,:,::-1], s), np.float32), config.image_mean, config.image_std).transpose((2, 0, 1))) if mirror: for c in range(len(current_img)): current_img.append(current_img[c][:, :, ::-1].copy()) current_img = [torch.from_numpy(np.expand_dims(c, axis=0)).float().cuda() for c in current_img] #current_mask = np.array(Image.open(ann_file)) > 0 current_mask = Image.open(ann_file) current_mask = np.atleast_3d(current_mask)[...,0] current_mask = current_mask.copy() current_mask[current_mask > 0] = 1 current_mask = torch.from_numpy(np.expand_dims(np.expand_dims(current_mask.astype(np.float32), axis=0), axis=0)).cuda() if args.model in ['base']: predictions = [model(cur) for ref, cur in zip(reference_img, current_img)] predictions = [F.interpolate(input=p[0], size=(H, W), mode='bilinear', align_corners=True) for p in predictions] elif args.model in ['intra']: predictions = [model(cur) for ref, cur in zip(reference_img, current_img)] predictions = [F.interpolate(input=p, size=(H, W), mode='bilinear', align_corners=True) for p in predictions] elif args.model in ['inter', 'concat', 'ad']: predictions = [model(ref, cur) for ref, cur in zip(reference_img, current_img)] predictions = [F.interpolate(input=p, size=(H, W), mode='bilinear', align_corners=True) for p in predictions] if mirror: for r in range(len(predictions)//2, len(predictions)): predictions[r] = torch.flip(predictions[r], [3]) predictions = torch.mean(torch.stack(predictions, dim=0), 0) predictions_all.append(predictions.sigmoid().data.cpu().numpy()[0, 0].copy()) gt_all.append(current_mask.data.cpu().numpy()[0, 0].astype(np.uint8).copy()) if args.inst_prune: result_dir = os.path.join('inst_prune', video) if os.path.exists(os.path.join(result_dir, img_file.split('/')[-1].split('.')[0] + '.png')): detection_mask = np.array( Image.open(os.path.join(result_dir, img_file.split('/')[-1].split('.')[0] + '.png'))) > 0 detection_mask = torch.from_numpy( np.expand_dims(np.expand_dims(detection_mask.astype(np.float32), axis=0), axis=0)).cuda() predictions = predictions * detection_mask process_now = (predictions > args.threshold).data.cpu().numpy().astype(np.uint8)[0, 0] if 100000 > process_now.sum() > 40000: last_mask_numpy = (predictions > args.threshold).data.cpu().numpy().astype(np.uint8)[0, 0] last_mask_numpy = cv2.morphologyEx(last_mask_numpy, cv2.MORPH_OPEN, kernel1) dilation = cv2.dilate(last_mask_numpy, kernel3, iterations=1) contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cnt_area = [cv2.contourArea(cnt) for cnt in contours] if len(contours) > 1: contour = contours[np.argmax(cnt_area)] polygon = contour.reshape(-1, 2) x, y, w, h = cv2.boundingRect(polygon) x0, y0 = x, y x1 = x + w y1 = y + h mask_rect = torch.from_numpy(np.zeros_like(dilation).astype(np.float32)).cuda() mask_rect[y0:y1, x0:x1] = 1 mask_rect = mask_rect.unsqueeze(0).unsqueeze(0) if np.max(cnt_area) > 30000: if last_mask_final is None or get_iou(last_mask_final, mask_rect, thresh=args.threshold) > 0.3: predictions = predictions * mask_rect last_mask_final = predictions.clone() if 100000 > last_mask_num > 5000: last_mask_numpy = (last_mask > args.threshold).data.cpu().numpy().astype(np.uint8)[0, 0] last_mask_numpy = cv2.morphologyEx(last_mask_numpy, cv2.MORPH_OPEN, kernel1) dilation = cv2.dilate(last_mask_numpy, kernel2, iterations=1) dilation = torch.from_numpy(dilation.astype(np.float32)).cuda() last_mask = predictions.clone() last_mask_num = (predictions > args.threshold).sum() predictions = predictions*dilation else: last_mask = predictions.clone() last_mask_num = (predictions > args.threshold).sum() iou_temp = get_iou(predictions, current_mask, thresh=args.threshold) if 0 < f < (len(ann_files)-1): curr_video_iou_list.append(iou_temp) if args.visualize: mask = colors[(predictions.cpu().numpy().squeeze() > args.threshold).astype(np.uint8)] output = ((0.4 * cv2.imread(img_file)) + (0.6 * mask)).astype("uint8") cv2.putText(output, "%.3f" % (iou_temp.item()), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) cv2.imshow(video, output) cv2.waitKey(1) suffix = args.ms_mirror*'ms_mirror'+(not args.ms_mirror)*'single'+args.inst_prune*'_prune' visual_path = osp.join('visualization', args.model + '_' + suffix, img_file.split('/')[-2]) if not osp.exists(visual_path): os.makedirs(visual_path) cv2.imwrite(osp.join(visual_path, ann_file.split('/')[-1]), output) if args.save_mask: suffix = args.ms_mirror*'ms_mirror'+(not args.ms_mirror)*'single'+args.inst_prune*'_prune' if not osp.exists(osp.join(args.save_mask_dir, args.model, suffix, video)): os.makedirs(osp.join(args.save_mask_dir, args.model, suffix, video)) cv2.imwrite(osp.join(args.save_mask_dir, args.model, suffix, video, ann_file.split('/')[-1]), (predictions.squeeze() > args.threshold).cpu().numpy().astype(np.uint8)) cv2.destroyAllWindows() video_mean_iou_list.append(sum(curr_video_iou_list)/len(curr_video_iou_list)) print('{} {} {}'.format(vid, video, video_mean_iou_list[-1])) if args.eval_sal: if not osp.exists(args.save_heatmap_dir): os.makedirs(args.save_heatmap_dir) with open(args.save_heatmap_dir + video + '.pkl', 'wb') as f: pickle.dump({'pred': np.array(predictions_all), 'gt': np.array(gt_all)}, f, pickle.HIGHEST_PROTOCOL) mean_iou = sum(video_mean_iou_list)/len(video_mean_iou_list) print('mean_iou {}'.format(mean_iou)) end = timeit.default_timer() print(end-start, 'seconds') # ========================== if args.eval_sal: pkl_files = glob.glob(args.save_heatmap_dir + '*.pkl') heatmap_gt = [] heatmap_pred = [] for i, pkl_file in enumerate(pkl_files): with open(pkl_file, 'rb') as f: info = pickle.load(f) heatmap_gt.append(np.array(info['gt'][1:-1]).flatten()) heatmap_pred.append(np.array(info['pred'][1:-1]).flatten()) heatmap_gt = np.hstack(heatmap_gt).flatten() heatmap_pred = np.hstack(heatmap_pred).flatten() precision, recall, _ = precision_recall_curve(heatmap_gt, heatmap_pred) Fmax = 2 * (precision * recall) / (precision + recall) print('MAE', np.mean(abs(heatmap_pred - heatmap_gt))) print('F_max', Fmax.max()) n_sample = len(precision)//1000 import scipy.io scipy.io.savemat('davis.mat', {'recall': recall[0::n_sample], 'precision': precision[0::n_sample]})