예제 #1
0
    def __getitem__(self, idx):
        video_info = self.data_source[idx]
        storage_obj = self.backend.open(video_info)

        frame_inds = self.frame_sampler.sample(len(storage_obj))
        num_segs, clip_len = frame_inds.shape

        img_list = storage_obj.get_frame(frame_inds.reshape(-1))
        img_tensor_list = []
        for i in range(num_segs):
            img_tensor = self.img_transform.apply_image(img_list[i*clip_len:(i+1)*clip_len])
            img_tensor_list.append(img_tensor)

        img_tensor = torch.cat(img_tensor_list, dim=0)
        # img_tensor: (M, C, H, W) M = N_seg * L
        img_tensor = img_tensor.view((num_segs, clip_len) + img_tensor.shape[1:])
        img_tensor = img_tensor.permute(0, 2, 1, 3, 4).contiguous()  # [N_seg, 3, L, H, W]

        data = dict(
            imgs=DataContainer(img_tensor, stack=True, cpu_only=False)
        )
        if not self.test_mode:
            gt_label = torch.LongTensor([video_info['label']]) - 1
            data['gt_labels'] = DataContainer(gt_label, stack=True, pad_dims=None, cpu_only=False)

        return data
예제 #2
0
def test_format_trimap():
    ori_trimap = np.random.randint(3, size=(64, 64))
    ori_trimap[ori_trimap == 1] = 128
    ori_trimap[ori_trimap == 2] = 255

    from mmcv.parallel import DataContainer
    ori_result = dict(trimap=torch.from_numpy(ori_trimap.copy()),
                      meta=DataContainer({}))
    format_trimap = FormatTrimap(to_onehot=False)
    results = format_trimap(ori_result)
    result_trimap = results['trimap']
    assert result_trimap.shape == (1, 64, 64)
    assert ((result_trimap.numpy() == 0) == (ori_trimap == 0)).all()
    assert ((result_trimap.numpy() == 1) == (ori_trimap == 128)).all()
    assert ((result_trimap.numpy() == 2) == (ori_trimap == 255)).all()

    ori_result = dict(trimap=torch.from_numpy(ori_trimap.copy()),
                      meta=DataContainer({}))
    format_trimap = FormatTrimap(to_onehot=True)
    results = format_trimap(ori_result)
    result_trimap = results['trimap']
    assert result_trimap.shape == (3, 64, 64)
    assert ((result_trimap[0, ...].numpy() == 1) == (ori_trimap == 0)).all()
    assert ((result_trimap[1, ...].numpy() == 1) == (ori_trimap == 128)).all()
    assert ((result_trimap[2, ...].numpy() == 1) == (ori_trimap == 255)).all()

    assert repr(format_trimap) == format_trimap.__class__.__name__ + (
        '(to_onehot=True)')
예제 #3
0
    def __getitem__(self, idx):
        video_info = self.data_source[idx]
        storage_obj = self.backend.open(video_info)
        frame_inds = self.frame_sampler.sample(len(storage_obj))
        num_segs, clip_len = frame_inds.shape
        assert num_segs == 1, f'support num_segs==1 only, got {num_segs}'

        img_list = storage_obj.get_frame(frame_inds.reshape(-1))
        img_tensor, trans_params = \
            self.img_transform.apply_image(img_list,
                                           return_transform_param=True)
        img_tensor = img_tensor.view((num_segs, clip_len) +
                                     img_tensor.shape[1:])
        img_tensor = img_tensor.permute(0, 2, 1, 3, 4).contiguous()

        data = dict(imgs=DataContainer(img_tensor,
                                       stack=True,
                                       pad_dims=2,
                                       cpu_only=False), )

        if not self.test_mode:
            gt_label = \
                torch.LongTensor([trans_params[self.rot_trans_index]['flag']])
            data['gt_labels'] = DataContainer(gt_label,
                                              stack=True,
                                              pad_dims=None,
                                              cpu_only=False)

        return data
예제 #4
0
    def __getitem__(self, idx):
        video_info = self.data_source[idx]
        # build video storage backend object
        storage_obj = self.backend.open(video_info)
        total_num_frames = len(storage_obj)
        assert total_num_frames > 0, "Bad data {}".format(video_info)
        frame_inds = self.clip_sampler.sample(total_num_frames)
        num_segs, clip_len = frame_inds.shape
        assert num_segs == 1
        frame_list = storage_obj.get_frame(frame_inds.reshape(-1))
        clip_tensor, trans_params = self.clip_transform.apply_image(frame_list, return_transform_param=True)
        clip_tensor = clip_tensor.permute(1, 0, 2, 3).contiguous()

        gt_trajs = torch.FloatTensor(trans_params[self.mask_trans_index]['traj_rois'])

        im_q, sampled_position = self.sample_single_img_tensor(storage_obj)
        im_k, _ = self.sample_single_img_tensor(storage_obj, position=sampled_position)

        data = dict(
            img_q=DataContainer(im_q, stack=True, pad_dims=1, cpu_only=False),
            img_k=DataContainer(im_k, stack=True, pad_dims=1, cpu_only=False),
            imgs=DataContainer(clip_tensor, stack=True, pad_dims=1, cpu_only=False),
            gt_trajs=DataContainer(gt_trajs, stack=True, pad_dims=1, cpu_only=False),
        )
        storage_obj.close()

        return data
예제 #5
0
    def __getitem__(self, idx):
        video_info = self.data_source[idx]
        # build video storage backend object
        storage_obj = self.backend.open(video_info)
        assert len(storage_obj) == len(video_info['gt_boxes'])
        frame_inds = self.frame_sampler.sample(len(storage_obj))
        assert frame_inds.shape[0] == 1, "Support single clip only."
        frame_inds = frame_inds.reshape(-1).astype(np.long)
        img_list = storage_obj.get_frame(frame_inds)

        gt_info = np.array(video_info['gt_boxes'], dtype=np.int)
        gt_labels = gt_info[frame_inds, -1]
        gt_boxes = gt_info[frame_inds, 0:4].astype(np.float32)

        img_list, gt_boxes = self.crop_wrt_gt_boxes(img_list, gt_boxes)
        # for some bounding boxes that are out of scope, we set the weight to be zeros.
        gt_ctrs = (gt_boxes[..., 0:2] + gt_boxes[..., 2:4]) * 0.5
        is_in_scope = (gt_ctrs >= 0) & (gt_ctrs <= self.x_size)
        is_in_scope = np.all(is_in_scope, axis=-1)
        gt_weights = np.ones((len(gt_labels), ), np.float32)
        gt_weights[gt_labels > 0] = 0.
        gt_weights[~is_in_scope] = 0.

        img_tensor, trans_params = self.img_transform.apply_image(
            img_list, return_transform_param=True)
        gt_boxes = self.img_transform.apply_boxes(gt_boxes, trans_params)
        gt_trajs = torch.FloatTensor(gt_boxes).unsqueeze(0)  # [1, 16, 4]
        gt_weights = torch.FloatTensor(gt_weights).unsqueeze(0)  # [1, 16]
        img_tensor = img_tensor.permute(1, 0, 2, 3).contiguous()

        data = dict(
            imgs=DataContainer(img_tensor,
                               stack=True,
                               pad_dims=1,
                               cpu_only=False),
            gt_trajs=DataContainer(gt_trajs,
                                   stack=True,
                                   pad_dims=1,
                                   cpu_only=False),
            gt_weights=DataContainer(gt_weights,
                                     stack=True,
                                     pad_dims=1,
                                     cpu_only=False),
        )
        storage_obj.close()

        return data
예제 #6
0
    def __getitem__(self, idx):
        video_info = self.data_source[idx]
        # build video storage backend object
        storage_obj = self.backend.open(video_info)
        frame_inds = self.frame_sampler.sample(len(storage_obj))
        num_segs, clip_len = frame_inds.shape
        assert num_segs == 1
        img_list = storage_obj.get_frame(frame_inds.reshape(-1))
        img_tensor, trans_params = self.img_transform.apply_image(img_list, return_transform_param=True)
        gt_trajs = torch.FloatTensor(trans_params[self.mask_trans_index]['traj_rois'])

        img_tensor = img_tensor.permute(1, 0, 2, 3).contiguous()
        data = dict(
            imgs=DataContainer(img_tensor, stack=True, pad_dims=1, cpu_only=False),
            gt_trajs=DataContainer(gt_trajs, stack=True, pad_dims=1, cpu_only=False),
        )
        storage_obj.close()

        return data
예제 #7
0
    def __getitem__(self, idx):
        video_info = self.data_source[idx]
        # build video storage backend object
        storage_obj = self.backend.open(video_info)

        imgs, stride_idx = self.sample_single_img_tensor(storage_obj)

        imgs = imgs.unsqueeze(0)
        gt_label = torch.LongTensor([stride_idx])
        data = dict(imgs=DataContainer(imgs,
                                       stack=True,
                                       pad_dims=1,
                                       cpu_only=False),
                    gt_labels=DataContainer(gt_label,
                                            stack=True,
                                            pad_dims=None,
                                            cpu_only=False))
        storage_obj.close()

        return data
예제 #8
0
    def __getitem__(self, idx):
        video_info = self.data_source[idx]
        # build video storage backend object
        storage_obj = self.backend.open(
            video_info)  # type: storage_backends.BaseStorageBackend
        num_frames = len(storage_obj)

        frame_inds = self._sample_indices(num_frames)

        # extract video frames from backend storage
        # get frame according to the frame indexes
        img_list = storage_obj.get_frame(frame_inds.reshape(-1))
        img_list = [img.astype(np.float32) for img in img_list]
        img_tensor = self.img_transform.apply_image(
            img_list)  # type: torch.Tensor
        img_tensor = img_tensor.view((self.tuple_len, self.clip_len) +
                                     img_tensor.shape[1:])
        img_tensor = img_tensor.permute(
            (0, 2, 1, 3, 4))  # to [N_tup, 3, T, H, W]

        # random shuffle
        shuffle_index = random.randint(0, self.permutations.size(0) - 1)
        shuffle_order = self.permutations[shuffle_index]
        img_tensor = img_tensor[shuffle_order].contiguous()

        gt_label = torch.LongTensor([shuffle_index])
        data = dict(imgs=DataContainer(img_tensor,
                                       stack=True,
                                       pad_dims=2,
                                       cpu_only=False),
                    gt_labels=DataContainer(gt_label,
                                            stack=True,
                                            pad_dims=None,
                                            cpu_only=False))

        return data
예제 #9
0
    def __getitem__(self, idx):
        video_info = self.data_source[idx]
        # build video storage backend object
        storage_obj = self.backend.open(video_info)
        total_num_frames = len(storage_obj)
        assert total_num_frames > 0, "Bad data {}".format(video_info)

        clip_tensor_with_transParam, clip_q, clip_k_p, clip_k_n, dataIndex = self.sample_query_key(
            storage_obj)
        clip_tensor, clip_tensor_trans_param = clip_tensor_with_transParam
        gt_trajs = torch.FloatTensor(
            clip_tensor_trans_param[self.mask_trans_index]['traj_rois'])

        data = dict(
            clip_q=DataContainer(clip_q,
                                 stack=True,
                                 pad_dims=1,
                                 cpu_only=False),
            clip_k_p=DataContainer(clip_k_p,
                                   stack=True,
                                   pad_dims=1,
                                   cpu_only=False),
            clip_k_n=DataContainer(clip_k_n,
                                   stack=True,
                                   pad_dims=1,
                                   cpu_only=False),
            dataIndex=DataContainer(dataIndex,
                                    stack=True,
                                    pad_dims=None,
                                    cpu_only=False),
            imgs=DataContainer(clip_tensor,
                               stack=True,
                               pad_dims=1,
                               cpu_only=False),
            gt_trajs=DataContainer(gt_trajs,
                                   stack=True,
                                   pad_dims=1,
                                   cpu_only=False),
        )
        storage_obj.close()

        return data
예제 #10
0
def collate(batch, samples_per_gpu=1, pad_size=None):
    """Puts each data field into a tensor/DataContainer with outer dimension
    batch size.

    Extend default_collate to add support for
    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.

    1. cpu_only = True, e.g., meta data
    2. cpu_only = False, stack = True, e.g., images tensors
    3. cpu_only = False, stack = False, e.g., gt bboxes
    """

    if not isinstance(batch, collections.Sequence):
        raise TypeError("{} is not supported.".format(batch.dtype))

    if isinstance(batch[0], DataContainer):
        assert len(batch) % samples_per_gpu == 0
        stacked = []
        if batch[0].cpu_only:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
            return DataContainer(stacked,
                                 batch[0].stack,
                                 batch[0].padding_value,
                                 cpu_only=True)
        elif batch[0].stack:
            for i in range(0, len(batch), samples_per_gpu):
                assert isinstance(batch[i].data, torch.Tensor)
                # TODO: handle tensors other than 3d
                assert batch[i].dim() == 3
                c, h, w = batch[0].size()
                for sample in batch[i:i + samples_per_gpu]:
                    assert c == sample.size(0)
                    h = max(h, sample.size(1))
                    w = max(w, sample.size(2))
                if pad_size is not None:
                    aspect_ratio = h / w
                    if aspect_ratio >= 1.0:
                        h = pad_size[0]
                        w = pad_size[1]
                    else:
                        h = pad_size[0]
                        w = pad_size[1]
                padded_samples = [
                    F.pad(sample.data,
                          (0, w - sample.size(2), 0, h - sample.size(1)),
                          value=sample.padding_value)
                    for sample in batch[i:i + samples_per_gpu]
                ]
                stacked.append(default_collate(padded_samples))
        else:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
    elif isinstance(batch[0], collections.Sequence):
        transposed = zip(*batch)
        return [
            collate(samples, samples_per_gpu, pad_size=pad_size)
            for samples in transposed
        ]
    elif isinstance(batch[0], collections.Mapping):
        return {
            key: collate([d[key] for d in batch],
                         samples_per_gpu,
                         pad_size=pad_size)
            for key in batch[0]
        }
    else:
        return default_collate(batch)
def collate(batch, samples_per_gpu=1):
    """Puts each data field into a tensor/DataContainer with outer dimension
    batch size.
    Extend default_collate to add support for
    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.
    1. cpu_only = True, e.g., meta data
    2. cpu_only = False, stack = True, e.g., images tensors
    3. cpu_only = False, stack = False, e.g., gt bboxes
    """

    if not isinstance(batch, collections.Sequence):
        raise TypeError("{} is not supported.".format(batch.dtype))

    if isinstance(batch[0], DataContainer):
        assert len(batch) % samples_per_gpu == 0
        stacked = []
        if batch[0].cpu_only:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
            return DataContainer(
                stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
        elif batch[0].stack:
            for i in range(0, len(batch), samples_per_gpu):
                assert isinstance(batch[i].data, torch.Tensor)

                if batch[i].pad_dims is not None:
                    ndim = batch[i].dim()
                    assert ndim > batch[i].pad_dims
                    max_shape = [0 for _ in range(batch[i].pad_dims)]
                    for dim in range(1, batch[i].pad_dims + 1):
                        max_shape[dim - 1] = batch[i].size(-dim)
                    for sample in batch[i:i + samples_per_gpu]:
                        for dim in range(0, ndim - batch[i].pad_dims):
                            assert batch[i].size(dim) == sample.size(dim)
                        for dim in range(1, batch[i].pad_dims + 1):
                            max_shape[dim - 1] = max(max_shape[dim - 1],
                                                     sample.size(-dim))
                    padded_samples = []
                    for sample in batch[i:i + samples_per_gpu]:
                        pad = [0 for _ in range(batch[i].pad_dims * 2)]
                        for dim in range(1, batch[i].pad_dims + 1):
                            pad[2 * dim -
                                1] = max_shape[dim - 1] - sample.size(-dim)
                        padded_samples.append(
                            F.pad(
                                sample.data, pad, value=sample.padding_value))
                    stacked.append(default_collate(padded_samples))
                elif batch[i].pad_dims is None:
                    stacked.append(
                        default_collate([
                            sample.data
                            for sample in batch[i:i + samples_per_gpu]
                        ]))
                else:
                    raise ValueError(
                        'pad_dims should be either None or integers (1-3)')

        else:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
    elif isinstance(batch[0], collections.Sequence):
        transposed = zip(*batch)
        return [collate(samples, samples_per_gpu) for samples in transposed]
    elif isinstance(batch[0], collections.Mapping):
        return {
            key: collate([d[key] for d in batch], samples_per_gpu)
            for key in batch[0]
        }
    else:
        return default_collate(batch)