예제 #1
0
    def get_item(self, index, shift=None):
        meta = {}
        meta['do_not_collate'] = True
        datas = self.data['datas'][index]
        if shift is None:
            path = self.data['image_paths'][index]
            meta['time'] = shift = datas['time']
        else:
            n = datas['n']
            shift = int(shift * (n - 1))
            base = datas['base']
            path = self.get_jpg_path(base, datas['id'], shift)
            meta['time'] = shift

        target = torch.IntTensor(self.num_classes).zero_()
        for x in datas['labels']:
            if x['start'] < shift / float(self.fps) < x['end']:
                target[self.cls2int[x['class']]] = 1

        meta['id'] = datas['id']
        img = default_loader(path)
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return img, target, meta
예제 #2
0
 def get_item(self, index, shift=None):
     ims, tars, meta = [], [], {}
     meta['do_not_collate'] = True
     fps = 24
     n = self.data['datas'][index]['n']
     if shift is None:
         shift = np.random.randint(n - self.train_gap - 2)
     else:
         shift = int(shift * (n - self.train_gap - 2))
     resize = transforms.Resize(int(256. / 224 * self.input_size))
     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
     spacing = np.arange(shift, shift + self.train_gap)
     for loc in spacing:
         ii = int(np.floor(loc))
         path = '{}{:06d}.jpg'.format(self.data['datas'][index]['base'],
                                      ii + 1)
         try:
             # ============ Temp ===================
             timer = Timer()
             img = default_loader(path)
             # ============ Temp ===================
             load_img_cost = timer.thetime() - timer.end
             timer.tic()
             print(
                 'Load image from disk: {0:.3f} sec'.format(load_img_cost))
         except Exception as e:
             print('failed to load image {}'.format(path))
             print(e)
             raise
         img = resize(img)
         img = transforms.ToTensor()(img)
         # ============ Temp ===================
         # totensor_cost = timer.thetime() - timer.end
         # timer.tic()
         # print('From PIL to tensor: {0:.3f} sec'.format(totensor_cost))
         #img = 2*img - 1
         img = normalize(img)
         ims.append(img)
         target = torch.IntTensor(self.num_classes).zero_()
         for x in self.data['datas'][index]['labels']:
             if x['start'] < ii / float(fps) < x['end']:
                 target[self.cls2int(x['class'])] = 1
         tars.append(target)
     meta['id'] = self.data['datas'][index]['id']
     meta['time'] = shift
     img = torch.stack(ims).permute(0, 2, 3, 1).numpy()
     target = torch.stack(tars)
     if self.transform is not None:
         img = self.transform(img)
         # ============ Temp ===================
         # transform_cost = timer.thetime() - timer.end
         # timer.tic()
         # print('Image transform per mini-batch: {0:.3f} sec'.format(transform_cost))
     if self.target_transform is not None:
         target = self.target_transform(target)
     # batch will be b x n x h x w x c
     # target will be b x n x nc
     return img, target, meta
    def __getitem__(self, index):
        ims = []
        personims = []
        tars = []
        meta = {}
        fps = 24
        n = self.data['datas'][index]['n']
        if hasattr(self.data['datas'][index], 'shift'):
            print('using shift')
            shift = self.data['datas'][index]['shift']
        else:
            shift = np.random.randint(n-self.train_gap-2)

        resize = transforms.Resize((int(256./224*self.input_size), int(256./224*self.input_size)))
        spacing = np.arange(shift, shift+self.train_gap)
        if len(self.data['datas'][index]['persontimes']) > 0:
            closest = np.argmin([abs(x-(shift+self.train_gap/2)) for x in self.data['datas'][index]['persontimes']])
            closest = self.data['datas'][index]['persontimes'][closest]
            with open(self.data['datas'][index]['personbase']+'{:06d}.txt'.format(closest), 'r') as f:
                box = [float(x) for x in f.readline().strip().split(' ')]
        else:
            print('no box, skipping cropping: {}'.format(self.data['datas'][index]['personbase']))
            box = []

        for loc in spacing:
            ii = int(np.floor(loc))
            path = '{}{:06d}.jpg'.format(self.data['datas'][index]['base'], ii+1)
            try:
                img = default_loader(path)
            except Exception as e:
                print('failed to load image {}'.format(path))
                print(e)
                raise
            if len(box) > 0:
                try:
                    personimg = _cropimg(img, *box[1:]).copy()
                    personimg = resize(personimg)
                    personimg = transforms.ToTensor()(personimg)
                    personimg = 2*personimg - 1
                    personims.append(personimg)
                except Exception, e:
                    print(e)
                    box = []
            img = resize(img)
            img = transforms.ToTensor()(img)
            img = 2*img - 1
            ims.append(img)
            if not len(box) > 0:
                personims.append(img)
            target = torch.IntTensor(self.num_classes).zero_()
            for x in self.data['datas'][index]['labels']:
                if x['start'] < ii/float(fps) < x['end']:
                    target[self.cls2int(x['class'])] = 1
            tars.append(target)
예제 #4
0
 def _get_one_image(self, index, shift):
     fps = 24
     path = '{}{:06d}.jpg'.format(self.data['datas'][index]['base'], shift+1)
     target = torch.IntTensor(self.num_classes).zero_()
     for x in self.data['datas'][index]['labels']:
         if x['start'] < shift/float(fps) < x['end']:
             target[self.cls2int(x['class'])] = 1
     meta = {}
     meta['id'] = self.data['datas'][index]['id']
     meta['time'] = shift
     img = default_loader(path)
     if self.transform is not None:
         img = self.transform(img)
     if self.target_transform is not None:
         target = self.target_transform(target)
     return img, target, meta
예제 #5
0
 def get_item(self, index, shift=None):
     meta = {}
     if shift is None:
         path = self.data['image_paths'][index]
         shift = meta['time'] = self.data['times'][index]
     else:
         n = self.data['ns'][index]
         shift = meta['time'] = int(shift * (n - 1))
         base = self.data['image_paths'][index][:-10]
         path = '{}{:06d}.jpg'.format(base, shift + 1)
     target = torch.IntTensor(self.num_classes).zero_()
     for x in self.data['labels'][index]:
         if x['start'] < shift / float(self.fps) < x['end']:
             target[self.cls2int(x['class'])] = 1
     meta['id'] = self.data['ids'][index]
     img = default_loader(path)
     if self.transform is not None:
         img = self.transform(img)
     if self.target_transform is not None:
         target = self.target_transform(target)
     return img, target, meta
예제 #6
0
    def get_item(self, index, shift=None):
        meta = self.data['meta'][index]
        n = meta['n']
        n_ego = meta['n_ego']
        if shift is None:
            shift = meta['thirdtime']
        else:
            shift = int(shift * (n - 1))
        imbase, egobase = self.data['image_paths'][index]
        egoii = to_ego_time(shift, n, n_ego)
        negii = get_neg_time(egoii, n_ego, self.deltaneg)
        if negii is None:
            raise Exception('deltaneg too big for video')
        meta['firsttime_pos'] = egoii
        meta['thirdtime'] = shift
        meta['firsttime_neg'] = negii
        egopath_pos = '{}{:06d}.jpg'.format(egobase, egoii + 1)
        impath = '{}{:06d}.jpg'.format(imbase, shift + 1)
        egopath_neg = '{}{:06d}.jpg'.format(egobase, negii + 1)
        impaths = (egopath_pos, impath, egopath_neg)

        target = self.data['targets'][index]
        classtarget = torch.IntTensor(self.num_classes).zero_()
        for x in self.data['labels'][index]:
            if x['start'] < shift / float(self.fps) < x['end']:
                classtarget[self.cls2int(x['class'])] = 1
        meta['class_target'] = classtarget

        meta['id'] = self.data['ids'][index]
        ims = [default_loader(im) for im in impaths]
        if self.transform is not None:
            ims = [self.transform(im) for im in ims]
        if self.target_transform is not None:
            target = self.target_transform(target)
        #if random() > 0.5:  # TODO DEBUG
        #    ims[2], ims[0] = ims[0], ims[2]
        #    target = -1
        return ims, target, meta
예제 #7
0
 def _process_stack(self, base, shift, data):
     ims, tars, meta = [], [], {}
     meta['do_not_collate'] = True
     if self.split == 'train' and np.random.random() > 0.5:
         resize = transforms.Resize(int(320. / 224 * self.input_size))
     else:
         resize = transforms.Resize(int(256. / 224 * self.input_size))
     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
     spacing = np.arange(shift, shift + self.train_gap)
     for loc in spacing:
         ii = int(np.floor(loc))
         path = '{}{:06d}.jpg'.format(base, ii + 1)
         try:
             img = default_loader(path)
         except Exception as e:
             print('failed to load image {}'.format(path))
             print(e)
             raise
         img = resize(img)
         img = transforms.ToTensor()(img)
         img = normalize(img)
         ims.append(img)
         target = torch.IntTensor(self.num_classes).zero_()
         for x in data['labels']:
             if x['start'] < ii / float(self.fps) < x['end']:
                 target[self.cls2int(x['class'])] = 1
         tars.append(target)
     meta['id'] = data['id']
     meta['time'] = shift
     img = torch.stack(ims).permute(0, 2, 3, 1).numpy()  # n, h, w, c
     target = torch.stack(tars)
     if self.transform is not None:
         img = self.transform(img)
     if self.target_transform is not None:
         target = self.target_transform(target)
     return img, target, meta
def read_one_iamge(path):
    # path = '/home/nfs/x.chang/Datasets/Kinetics400/train_frms/lunge/Ku56XQb3N40_000004_000014/00000000.jpg'
    return default_loader(path)
""" Video loader for the Charades dataset """

from datasets import utils
from misc_utils.utils import Timer

# path = '/home/SERILOCAL/xiatian.zhu/Data/test_video/ZZXQF-000002.jpg'
path = '/home/nfs/x.chang/Datasets/Charades/Charades/Charades_v1_rgb/ZZXQF/ZZXQF-000002.jpg'

for i in range(10):
    try:
        # ============ Temp ===================
        timer = Timer()
        img = utils.default_loader(path)
        # ============ Temp ===================
        load_img_cost = timer.thetime() - timer.end
        timer.tic()
        print('Load image from disk: {0:.3f} sec'.format(load_img_cost))
    except Exception as e:
        print('failed to load image {}'.format(path))
        print(e)
        raise