Esempio n. 1
0
from detectron2.structures import BoxMode

from gulpio import GulpDirectory
from epic_kitchens.dataset.epic_dataset import EpicVideoDataset
from gulpio.transforms import Scale, CenterCrop, Compose, UnitNorm

from read_gulpio import EpicDataset

class_type = 'noun'
rgb_train = EpicVideoDataset('../../epic/data/processed/gulp/rgb_train',
                             class_type)
transforms = Compose([])
dataset = EpicDataset(transforms)
segment_uids = list(rgb_train.gulp_dir.merged_meta_dict.keys())
exsample_segment = rgb_train.video_segments[10]
exsample_frames = rgb_train.load_frames(exsample_segment)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)

for batch_num, (data, label) in enumerate(dataloader):
    frame = data[0].to('cpu').detach().numpy().copy()
    frame = frame.transpose(1, 2, 3, 0)
    frame = np.squeeze(frame)
    break
im = frame
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
Esempio n. 2
0
class epic_gulp(data.Dataset):
    def __init__(self,
                 mode='train',
                 transform=None,
                 seq_len=6,
                 num_seq=5,
                 downsample=3,
                 class_type='verb+noun'):
        self.mode = mode
        self.transform = transform
        self.seq_len = seq_len
        self.num_seq = num_seq
        self.downsample = downsample
        self.class_type = class_type
        gulp_root = '/proj/vondrick/datasets/epic-kitchens/data/processed/gulp'

        print(os.path.join(gulp_root, 'rgb_train', self.class_type))
        self.EpicDataset = EpicVideoDataset(
            os.path.join(gulp_root, 'rgb_train'), self.class_type)
        dataset = list(self.EpicDataset)
        rgb = []
        for i in range(len(dataset)):
            # remove segments that are too short
            if dataset[
                    i].num_frames > self.seq_len * self.num_seq * self.downsample:
                rgb.append(dataset[i])
        del dataset
        train_idx = random.sample(range(1, len(rgb)),
                                  int(float(len(rgb)) * 0.8))
        rgb_train = []
        rgb_val = []
        for i in range(len(rgb)):
            if i in train_idx:
                rgb_train.append(rgb[i])
            else:
                rgb_val.append(rgb[i])
        if self.mode == 'train':
            self.video_info = rgb_train
        elif self.mode in ['val']:
            self.video_info = rgb_val

    def idx_sampler(self, index):
        vlen = self.video_info[index].num_frames
        if vlen - self.num_seq * self.seq_len * self.downsample <= 0:
            return None
        n = 1
        start_idx = np.random.choice(
            range(vlen - self.num_seq * self.seq_len * self.downsample), n)
        #             print ("start_idx:", start_idx)
        seq_idx = np.expand_dims(np.arange(
            self.num_seq), -1) * self.downsample * self.seq_len + start_idx
        #             print ("seq_idx:", seq_idx)
        seq_idx_block = seq_idx + \
            np.expand_dims(np.arange(self.seq_len), 0) * self.downsample
        #             print ("seq_idx_block:", seq_idx_block)
        return seq_idx_block

    def __getitem__(self, index):

        idx_block = self.idx_sampler(index)
        #             print(idx_block)
        #             print(index)
        #             print(len(self.video_info))
        assert idx_block.shape == (self.num_seq, self.seq_len)
        idx_block = idx_block.reshape(self.num_seq * self.seq_len)

        #print ("idx_block, ", idx_block)
        segment = self.EpicDataset.load_frames(self.video_info[index])
        seq = [segment[i] for i in idx_block]

        # do we need it here
        t_seq = self.transform(seq)  # apply same transform
        num_crop = None
        try:
            (C, H, W) = t_seq[0].size()
            t_seq = torch.stack(t_seq, 0)
        except:
            (C, H, W) = t_seq[0][0].size()
            tmp = [torch.stack(i, 0) for i in t_seq]
            assert len(tmp) == 5
            num_crop = 5
            t_seq = torch.stack(tmp, 1)
        t_seq = t_seq.view(self.num_seq, self.seq_len, C, H, W).transpose(1, 2)

        action = torch.LongTensor([self.video_info[index].verb_class])
        noun = torch.LongTensor([self.video_info[index].noun_class])

        # OLD: return sequence only
        # return t_seq, action, noun
        # NEW: return all useful information in a dictionary
        result = {
            't_seq': t_seq,
            'idx_block': idx_block,
            'vpath': 'TODO, idk how to retrieve',
            'action': action,
            'noun': noun
        }
        return result

    def __len__(self):
        return len(self.video_info)