Example #1
0
    def __getitem__(self, idx):
        video_name = self.dataset_list[idx]

        img_dir = os.path.join(self.root, 'JPEGImages', '480p', video_name)
        mask_dir = os.path.join(self.root, 'Annotations', '480p', video_name)

        img_list = sorted(glob(os.path.join(img_dir, '*.jpg')))
        mask_list = sorted(glob(os.path.join(mask_dir, '*.png')))

        first_mask = myutils.load_image_in_PIL(mask_list[0], 'P')
        first_mask_np = np.array(first_mask, np.uint8)

        if self.single_obj:
            first_mask_np[first_mask_np > 1] = 1

        h, w = first_mask_np.shape
        obj_n = first_mask_np.max() + 1
        video_len = len(img_list)

        frames = torch.zeros((video_len, 3, h, w), dtype=torch.float)
        masks = torch.zeros((1, obj_n, h, w), dtype=torch.float)

        mask, _ = self.to_onehot(first_mask_np)
        masks[0] = mask[:obj_n]

        for i in range(video_len):
            img = myutils.load_image_in_PIL(img_list[i], 'RGB')
            frames[i] = self.to_tensor(img)

        info = {
            'name': video_name,
            'num_frames': video_len,
        }

        return frames, masks, obj_n, info
Example #2
0
    def __getitem__(self, idx):

        img_pil = myutils.load_image_in_PIL(self.img_list[idx], 'RGB')
        mask_pil = myutils.load_image_in_PIL(self.mask_list[idx], 'P')

        frames = torch.zeros((self.clip_n, 3, self.output_size, self.output_size), dtype=torch.float)
        masks = torch.zeros((self.clip_n, self.max_obj_n, self.output_size, self.output_size), dtype=torch.float)

        for i in range(self.clip_n):
            img, mask = img_pil, mask_pil
            if i > 0:
                img, mask = self.random_horizontal_flip(img, mask)
                img = self.color_jitter(img)
                img, mask = self.random_affine(img, mask)

            img, mask = self.random_resize_crop(img, mask)

            mask = np.array(mask, np.uint8)

            if i == 0:
                mask, obj_list = self.to_onehot(mask)
                obj_n = len(obj_list) + 1
            else:
                mask, _ = self.to_onehot(mask, obj_list)

            frames[i] = self.to_tensor(img)
            masks[i] = mask

        info = {
            'name': self.img_list[idx]
        }
        return frames, masks[:, :obj_n], obj_n, info
Example #3
0
    def __getitem__(self, idx):

        video_name = self.dataset_list[idx]
        img_dir = os.path.join(self.root, 'JPEGImages', '480p', video_name)
        mask_dir = os.path.join(self.root, 'Annotations', '480p', video_name)

        img_list = sorted(glob(os.path.join(img_dir, '*.jpg')))
        mask_list = sorted(glob(os.path.join(mask_dir, '*.png')))

        idx_list = list(range(len(img_list)))
        random.shuffle(idx_list)
        idx_list = idx_list[:self.clip_n]

        frames = torch.zeros(
            (self.clip_n, 3, self.output_size, self.output_size),
            dtype=torch.float)
        masks = torch.zeros(
            (self.clip_n, self.max_obj_n, self.output_size, self.output_size),
            dtype=torch.float)

        for i, frame_idx in enumerate(idx_list):
            img = myutils.load_image_in_PIL(img_list[frame_idx], 'RGB')
            mask = myutils.load_image_in_PIL(mask_list[frame_idx], 'P')

            if i > 0:
                img = self.color_jitter(img)
                img, mask = self.random_affine(img, mask)

            roi_cnt = 0
            while roi_cnt < 10:
                img_roi, mask_roi = self.random_resize_crop(img, mask)

                mask_roi = np.array(mask_roi, np.uint8)

                if i == 0:
                    mask_roi, obj_list = self.to_onehot(mask_roi)
                    obj_n = len(obj_list) + 1
                else:
                    mask_roi, _ = self.to_onehot(mask_roi, obj_list)

                if torch.any(mask_roi[0] == 0).item():
                    break

                roi_cnt += 1

            frames[i] = self.to_tensor(img_roi)
            masks[i] = mask_roi

        info = {'name': video_name, 'idx_list': idx_list}

        return frames, masks[:, :obj_n], obj_n, info
Example #4
0
def cvt_mask_palette_VOC(data):
    src_path, dst_path = data
    mask = np.array(myutils.load_image_in_PIL(src_path, 'P'))
    mask[mask > 20] = 0
    mask = Image.fromarray(mask)
    mask.putpalette(mask_palette)

    mask.save(dst_path)
Example #5
0
    def __init__(self, img_dir, mask_dir):
        self.img_list = sorted(
            glob(os.path.join(img_dir, '*.jpg')) +
            glob(os.path.join(img_dir, '*.png')))
        self.mask_list = sorted(glob(os.path.join(mask_dir, '*.png')))

        first_mask = myutils.load_image_in_PIL(self.mask_list[0], 'P')
        first_frame = myutils.load_image_in_PIL(self.img_list[0])
        self.first_name = os.path.basename(self.img_list[0])[:-4]

        first_mask = np.array(first_mask, np.uint8)
        self.obj_n = first_mask.max() + 1
        self.img_list = self.img_list[1:]
        self.video_len = len(self.img_list)

        self.to_tensor = TF.ToTensor()
        self.to_onehot = mytrans.ToOnehot(self.obj_n, shuffle=False)

        first_mask, _ = self.to_onehot(first_mask)
        self.first_frame = self.to_tensor(first_frame)
        self.first_mask = first_mask[:self.obj_n]
Example #6
0
    def __getitem__(self, idx):
        img = myutils.load_image_in_PIL(self.img_list[idx], 'RGB')
        frame = self.to_tensor(img)
        img_name = os.path.basename(self.img_list[idx])[:-4]

        return frame, img_name
Example #7
0
    def __getitem__(self, idx):

        video_name = self.dataset_list[idx]

        img_dir = os.path.join(self.root, 'JPEGImages', video_name)
        mask_dir = os.path.join(self.root, 'Annotations', video_name)

        img_list = sorted(glob(os.path.join(img_dir, '*.jpg')))
        basename_list = [os.path.basename(x)[:-4] for x in img_list]
        video_len = len(img_list)
        selected_idx = np.ones(video_len, np.bool)

        objs = self.meta_data['videos'][video_name]['objects']
        obj_n = 1
        video_obj_appear_st_idx = video_len

        for obj_idx, obj_gt in objs.items():
            obj_n = max(obj_n, int(obj_idx) + 1)
            video_obj_appear_idx = basename_list.index(obj_gt['frames'][0])
            video_obj_appear_st_idx = min(video_obj_appear_st_idx,
                                          video_obj_appear_idx)

        selected_idx[:video_obj_appear_st_idx] = False
        selected_idx = selected_idx.tolist()

        img_list = list(compress(img_list, selected_idx))
        basename_list = list(compress(basename_list, selected_idx))

        video_len = len(img_list)
        obj_vis = np.zeros((video_len, obj_n), np.uint8)
        obj_vis[:, 0] = 1
        obj_st = np.zeros(obj_n, np.uint8)

        tmp_img = myutils.load_image_in_PIL(img_list[0], 'RGB')
        original_w, original_h = tmp_img.size
        if original_h < self.out_h:
            out_h, out_w = original_h, original_w
        else:
            out_h = self.out_h
            out_w = int(original_w / original_h * self.out_h)
        masks = torch.zeros((obj_n, out_h, out_w), dtype=torch.bool)

        basename_to_save = list()
        for obj_idx, obj_gt in objs.items():
            obj_idx = int(obj_idx)
            basename_to_save += obj_gt['frames']

            frame_idx = basename_list.index(obj_gt['frames'][0])
            obj_st[obj_idx] = frame_idx
            obj_vis[frame_idx:, obj_idx] = 1

            mask_path = os.path.join(mask_dir, obj_gt['frames'][0] + '.png')
            mask_raw = myutils.load_image_in_PIL(mask_path, 'P')
            mask_raw = mask_raw.resize((out_w, out_h))
            mask_raw = torch.from_numpy(np.array(mask_raw, np.uint8))

            masks[obj_idx, mask_raw == obj_idx] = 1

        basename_to_save = sorted(list(set(basename_to_save)))

        frames = torch.zeros((video_len, 3, out_h, out_w), dtype=torch.float)
        for i in range(video_len):
            img = myutils.load_image_in_PIL(img_list[i], 'RGB')
            img = img.resize((out_w, out_h))
            frames[i] = self.to_tensor(img)

        info = {
            'name': video_name,
            'num_frames': video_len,
            'obj_vis': obj_vis,
            'obj_st': obj_st,
            'basename_list': basename_list,
            'basename_to_save': basename_to_save,
            'original_size': (original_h, original_w)
        }

        return frames, masks, obj_n, info