def __getitem__(self, idx): video_name = self.dataset_list[idx] img_dir = os.path.join(self.root, 'JPEGImages', '480p', video_name) mask_dir = os.path.join(self.root, 'Annotations', '480p', video_name) img_list = sorted(glob(os.path.join(img_dir, '*.jpg'))) mask_list = sorted(glob(os.path.join(mask_dir, '*.png'))) first_mask = myutils.load_image_in_PIL(mask_list[0], 'P') first_mask_np = np.array(first_mask, np.uint8) if self.single_obj: first_mask_np[first_mask_np > 1] = 1 h, w = first_mask_np.shape obj_n = first_mask_np.max() + 1 video_len = len(img_list) frames = torch.zeros((video_len, 3, h, w), dtype=torch.float) masks = torch.zeros((1, obj_n, h, w), dtype=torch.float) mask, _ = self.to_onehot(first_mask_np) masks[0] = mask[:obj_n] for i in range(video_len): img = myutils.load_image_in_PIL(img_list[i], 'RGB') frames[i] = self.to_tensor(img) info = { 'name': video_name, 'num_frames': video_len, } return frames, masks, obj_n, info
def __getitem__(self, idx): img_pil = myutils.load_image_in_PIL(self.img_list[idx], 'RGB') mask_pil = myutils.load_image_in_PIL(self.mask_list[idx], 'P') frames = torch.zeros((self.clip_n, 3, self.output_size, self.output_size), dtype=torch.float) masks = torch.zeros((self.clip_n, self.max_obj_n, self.output_size, self.output_size), dtype=torch.float) for i in range(self.clip_n): img, mask = img_pil, mask_pil if i > 0: img, mask = self.random_horizontal_flip(img, mask) img = self.color_jitter(img) img, mask = self.random_affine(img, mask) img, mask = self.random_resize_crop(img, mask) mask = np.array(mask, np.uint8) if i == 0: mask, obj_list = self.to_onehot(mask) obj_n = len(obj_list) + 1 else: mask, _ = self.to_onehot(mask, obj_list) frames[i] = self.to_tensor(img) masks[i] = mask info = { 'name': self.img_list[idx] } return frames, masks[:, :obj_n], obj_n, info
def __getitem__(self, idx): video_name = self.dataset_list[idx] img_dir = os.path.join(self.root, 'JPEGImages', '480p', video_name) mask_dir = os.path.join(self.root, 'Annotations', '480p', video_name) img_list = sorted(glob(os.path.join(img_dir, '*.jpg'))) mask_list = sorted(glob(os.path.join(mask_dir, '*.png'))) idx_list = list(range(len(img_list))) random.shuffle(idx_list) idx_list = idx_list[:self.clip_n] frames = torch.zeros( (self.clip_n, 3, self.output_size, self.output_size), dtype=torch.float) masks = torch.zeros( (self.clip_n, self.max_obj_n, self.output_size, self.output_size), dtype=torch.float) for i, frame_idx in enumerate(idx_list): img = myutils.load_image_in_PIL(img_list[frame_idx], 'RGB') mask = myutils.load_image_in_PIL(mask_list[frame_idx], 'P') if i > 0: img = self.color_jitter(img) img, mask = self.random_affine(img, mask) roi_cnt = 0 while roi_cnt < 10: img_roi, mask_roi = self.random_resize_crop(img, mask) mask_roi = np.array(mask_roi, np.uint8) if i == 0: mask_roi, obj_list = self.to_onehot(mask_roi) obj_n = len(obj_list) + 1 else: mask_roi, _ = self.to_onehot(mask_roi, obj_list) if torch.any(mask_roi[0] == 0).item(): break roi_cnt += 1 frames[i] = self.to_tensor(img_roi) masks[i] = mask_roi info = {'name': video_name, 'idx_list': idx_list} return frames, masks[:, :obj_n], obj_n, info
def cvt_mask_palette_VOC(data): src_path, dst_path = data mask = np.array(myutils.load_image_in_PIL(src_path, 'P')) mask[mask > 20] = 0 mask = Image.fromarray(mask) mask.putpalette(mask_palette) mask.save(dst_path)
def __init__(self, img_dir, mask_dir): self.img_list = sorted( glob(os.path.join(img_dir, '*.jpg')) + glob(os.path.join(img_dir, '*.png'))) self.mask_list = sorted(glob(os.path.join(mask_dir, '*.png'))) first_mask = myutils.load_image_in_PIL(self.mask_list[0], 'P') first_frame = myutils.load_image_in_PIL(self.img_list[0]) self.first_name = os.path.basename(self.img_list[0])[:-4] first_mask = np.array(first_mask, np.uint8) self.obj_n = first_mask.max() + 1 self.img_list = self.img_list[1:] self.video_len = len(self.img_list) self.to_tensor = TF.ToTensor() self.to_onehot = mytrans.ToOnehot(self.obj_n, shuffle=False) first_mask, _ = self.to_onehot(first_mask) self.first_frame = self.to_tensor(first_frame) self.first_mask = first_mask[:self.obj_n]
def __getitem__(self, idx): img = myutils.load_image_in_PIL(self.img_list[idx], 'RGB') frame = self.to_tensor(img) img_name = os.path.basename(self.img_list[idx])[:-4] return frame, img_name
def __getitem__(self, idx): video_name = self.dataset_list[idx] img_dir = os.path.join(self.root, 'JPEGImages', video_name) mask_dir = os.path.join(self.root, 'Annotations', video_name) img_list = sorted(glob(os.path.join(img_dir, '*.jpg'))) basename_list = [os.path.basename(x)[:-4] for x in img_list] video_len = len(img_list) selected_idx = np.ones(video_len, np.bool) objs = self.meta_data['videos'][video_name]['objects'] obj_n = 1 video_obj_appear_st_idx = video_len for obj_idx, obj_gt in objs.items(): obj_n = max(obj_n, int(obj_idx) + 1) video_obj_appear_idx = basename_list.index(obj_gt['frames'][0]) video_obj_appear_st_idx = min(video_obj_appear_st_idx, video_obj_appear_idx) selected_idx[:video_obj_appear_st_idx] = False selected_idx = selected_idx.tolist() img_list = list(compress(img_list, selected_idx)) basename_list = list(compress(basename_list, selected_idx)) video_len = len(img_list) obj_vis = np.zeros((video_len, obj_n), np.uint8) obj_vis[:, 0] = 1 obj_st = np.zeros(obj_n, np.uint8) tmp_img = myutils.load_image_in_PIL(img_list[0], 'RGB') original_w, original_h = tmp_img.size if original_h < self.out_h: out_h, out_w = original_h, original_w else: out_h = self.out_h out_w = int(original_w / original_h * self.out_h) masks = torch.zeros((obj_n, out_h, out_w), dtype=torch.bool) basename_to_save = list() for obj_idx, obj_gt in objs.items(): obj_idx = int(obj_idx) basename_to_save += obj_gt['frames'] frame_idx = basename_list.index(obj_gt['frames'][0]) obj_st[obj_idx] = frame_idx obj_vis[frame_idx:, obj_idx] = 1 mask_path = os.path.join(mask_dir, obj_gt['frames'][0] + '.png') mask_raw = myutils.load_image_in_PIL(mask_path, 'P') mask_raw = mask_raw.resize((out_w, out_h)) mask_raw = torch.from_numpy(np.array(mask_raw, np.uint8)) masks[obj_idx, mask_raw == obj_idx] = 1 basename_to_save = sorted(list(set(basename_to_save))) frames = torch.zeros((video_len, 3, out_h, out_w), dtype=torch.float) for i in range(video_len): img = myutils.load_image_in_PIL(img_list[i], 'RGB') img = img.resize((out_w, out_h)) frames[i] = self.to_tensor(img) info = { 'name': video_name, 'num_frames': video_len, 'obj_vis': obj_vis, 'obj_st': obj_st, 'basename_list': basename_list, 'basename_to_save': basename_to_save, 'original_size': (original_h, original_w) } return frames, masks, obj_n, info