Esempio n. 1
0
    def __getitem__(self, index):

        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2
        else:
            representation_idx = 0


        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        frames = []
        for seg in range(self._num_segments):

            if self._is_train:
                gop_index, gop_pos = self._get_train_frame_index(num_frames, seg)
            else:
                gop_index, gop_pos = self._get_test_frame_index(num_frames, seg)

            img = load(video_path, gop_index, gop_pos,
                       representation_idx, self._accumulate)

            if img is None:
                print('Error: loading video %s failed.' % video_path)
                img = np.zeros((256, 256, 2)) if self._representation == 'mv' else np.zeros((256, 256, 3))
            else:
                if self._representation == 'mv':
                    img = clip_and_scale(img, 20)
                    img += 128
                    img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
                elif self._representation == 'residual':
                    img += 128
                    img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)

            if self._representation == 'iframe':
                img = color_aug(img)

                # BGR to RGB. (PyTorch uses RGB according to doc.)
                img = img[..., ::-1]

            frames.append(img)

        frames = self._transform(frames)

        frames = np.array(frames)
        frames = np.transpose(frames, (0, 3, 1, 2))
        input = torch.from_numpy(frames).float() / 255.0

        if self._representation == 'iframe':
            input = (input - self._input_mean) / self._input_std
        elif self._representation == 'residual':
            input = (input - 0.5) / self._input_std
        elif self._representation == 'mv':
            input = (input - 0.5)

        return input, label
Esempio n. 2
0
def load_segment(is_train, num_frames, seg, representation, num_segments,
                 video_path, representation_idx, accumulate):
    if is_train:
        gop_index, gop_pos = get_train_frame_index(num_frames, seg,
                                                   representation,
                                                   num_segments)
    else:
        gop_index, gop_pos = get_test_frame_index(num_frames, seg,
                                                  representation, num_segments)

    img = load(video_path, gop_index, gop_pos, representation_idx, accumulate)

    if img is None:
        print('Error: loading video %s failed.' % video_path)
        img = np.zeros((256, 256, 2)) if representation == 'mv' else np.zeros(
            (256, 256, 3))
    else:
        if representation == 'mv':
            img = clip_and_scale(img, 20)
            img += 128
            img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
        elif representation == 'residual':
            img += 128
            img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)

    if representation == 'iframe':
        if is_train:
            img = color_aug(img)

        # BGR to RGB. (PyTorch uses RGB according to doc.)
        img = img[..., ::-1]
    return img
Esempio n. 3
0
 def process_segment_consecutive(self, frames, gop_index, gop_pos, video_path,representation_idx):
     # if self._is_train:
     #     gop_index, gop_pos = self._get_train_frame_index(num_frames, seg)
     # else:
     #     gop_index, gop_pos = self._get_test_frame_index(num_frames, seg)
     # returns image of the specified frame
     img = load(video_path, gop_index, gop_pos,
                representation_idx, self._accumulate)
     if img is None:
         print('Error: loading video %s failed.' % video_path)
         img = np.zeros((256, 256, 2)) if self._representation == 'mv' else np.zeros((256, 256, 3))
     else:
         if self._representation == 'mv':
             img = clip_and_scale(img, 20)
             img += 128
             img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
         elif self._representation == 'residual':
             img += 128
             img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
     if self._representation == 'iframe':
         img = color_aug(img)
         # BGR to RGB. (PyTorch uses RGB according to doc.)
         img = img[..., ::-1]
     frames.append(img)
     return frames
Esempio n. 4
0
    def __getitem__(self, index):

        video_path = self.data['video_path'][index]
        gop_index = self.data['gop_index'][index]
        target = self.data['targets'][index]
        # print(video_path, gop_index, target)

        if self._representation == 'iframe':
            frames_i = []
            img_i = load(video_path, gop_index, 0, 0, self._accumulate)
            img_i = color_aug(img_i)
            img_i = img_i[..., ::-1]
            frames_i.append(img_i)
            frames_i = self._transform(frames_i)
            frames_i = np.array(frames_i)
            frames_i = np.transpose(frames_i, (0, 3, 1, 2))
            input_i = torch.from_numpy(frames_i).float() / 255.0
            input_i = (input_i - self._input_mean) / self._input_std
            input = input_i


        if self._representation == 'mv':
            frames_m = []
            img_m = load(video_path, gop_index, 6, 1, self._accumulate)
            img_m = clip_and_scale(img_m, 20)
            img_m += 128
            img_m = (np.minimum(np.maximum(img_m, 0), 255)).astype(np.uint8)
            frames_m.append(img_m)
            frames_m = self._transform(frames_m)
            frames_m = np.array(frames_m)
            frames_m = np.transpose(frames_m, (0, 3, 1, 2))
            input_m = torch.from_numpy(frames_m).float() / 255.0
            input_m = (input_m - 0.5)
            input = input_m


        if self._representation == 'r':
            frames_r = []                        
            img_r = load(video_path, gop_index, 6, 2, self._accumulate)
            img_r += 128
            img_r = (np.minimum(np.maximum(img_r, 0), 255)).astype(np.uint8)           
            frames_r.append(img_r)      
            frames_r = self._transform(frames_r)     
            frames_r = np.array(frames_r)        
            frames_r = np.transpose(frames_r, (0, 3, 1, 2))          
            input_r = torch.from_numpy(frames_r).float() / 255.0            
            input_r = (input_r - 0.5) / self._input_std
            input = input_r

        # print(input.shape)
        # target = target.long()
        # print(target.shape)
        return input, target
    def __getitem__(self, index): 

        frames = []
        tmp = self._frames[index]
        for _, img in enumerate(tmp):
            if img is None:
                print('Error: loading video failed.')
                img = np.zeros((256, 256, 2)) if self._representation == 'mv' else np.zeros((256, 256, 3))
            else:
                if self._representation == 'mv':
                    img = clip_and_scale(img, 20)
                    img += 128
                    img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
                elif self._representation == 'residual':
                    img += 128
                    img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)

            if self._representation == 'iframe':
                img = color_aug(img)
                # BGR to RGB. (PyTorch uses RGB according to doc.)
                img = img[..., ::-1]

            frames.append(img)

        frames = self._transform(frames)

        frames = np.array(frames)
        frames = np.transpose(frames, (0, 3, 1, 2))
        input = torch.from_numpy(frames).float() / 255.0

        if self._representation == 'iframe':
            input = (input - self._input_mean) / self._input_std
        elif self._representation == 'residual':
            input = (input - 0.5) / self._input_std
        elif self._representation == 'mv':
            input = (input - 0.5)

        return input
Esempio n. 6
0
    def __getitem__(self, index):

        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2
        else:
            representation_idx = 0

        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        frames = []
        idx_first = -99999
        for seg in range(self._num_segments):

            if self._is_train:
                gop_index, gop_pos = self._get_train_frame_index(
                    num_frames, seg)
            else:
                gop_index, gop_pos = self._get_test_frame_index(
                    num_frames, seg)

            flow_path = video_path_to_flow_path(self._flow_root, video_path)
            if self._flow_folder == 'tvl1':
                flow_tmpl = 'flow_{0}_{1:05d}.jpg'
            idx = gop_index * GOP_SIZE + gop_pos + 1
            if idx_first == -99999:
                idx_first = idx
            # read the corresponding pre-computed optical flow along x and y dimension
            x_img = np.array(
                Image.open(os.path.join(flow_path,
                                        flow_tmpl.format('x',
                                                         idx))).convert('L'))
            y_img = np.array(
                Image.open(os.path.join(flow_path,
                                        flow_tmpl.format('y',
                                                         idx))).convert('L'))
            flow = np.stack([x_img, y_img], axis=-1)
            if flow is None:
                print('Error: loading flow %s failed.' % video_path)

            # load MV and data pre-processing
            mv = load(video_path, gop_index, gop_pos, representation_idx,
                      self._accumulate)

            if mv is None:
                print('Error: loading video %s failed.' % video_path)
                mv = np.zeros(
                    (256, 256,
                     2)) if self._representation == 'mv' else np.zeros(
                         (256, 256, 3))
            else:
                if self._representation == 'mv':
                    if self._mv_minmaxnorm == 1:
                        mv = clip_and_scale(
                            mv, 20)  # scale values from +-20 to +-127.5
                    mv += 128
                    mv = (np.minimum(np.maximum(mv, 0), 255)).astype(np.uint8)
                elif self._representation == 'residual':
                    mv += 128
                    mv = (np.minimum(np.maximum(mv, 0), 255)).astype(np.uint8)

            if self._representation == 'iframe':
                mv = color_aug(mv)

                # BGR to RGB. (PyTorch uses RGB according to doc.)
                mv = mv[..., ::-1]

            # load residual and data pre-processing
            residual = load(video_path, gop_index, gop_pos, 2,
                            self._accumulate)
            residual += 128
            residual = (np.minimum(np.maximum(residual, 0),
                                   255)).astype(np.uint8)

            frames.append(np.concatenate((flow, mv, residual), axis=2))

        frames = self._transform(frames)

        frames = np.array(frames)
        frames = np.transpose(frames, (0, 3, 1, 2))
        # print('frames shape in dataloader:')
        # print(frames.shape)  # (num_crops*num_segments, 5, 224, 224)

        # split input into input_mv and input_flow
        input_flow = frames[:, 0:2, :, :]
        input_mv = frames[:, 2:4, :, :]
        input_residual = frames[:, 4:, :, :]

        if self._flow_ds_factor is not 0:
            # downsample to make OF blocky
            factor = self._flow_ds_factor
            w_max = input_flow.shape[2]
            h_max = input_flow.shape[3]
            input_flow = block_reduce(input_flow,
                                      block_size=(1, 1, factor, factor),
                                      func=np.mean)
            # resize to original size by repeating or interpolation
            if self._upsample_interp is False:
                input_flow = input_flow.repeat(factor, axis=2).repeat(factor,
                                                                      axis=3)
            else:
                # interpolate along certain dimension? only interp1d can do so
                w_max_ds = input_flow.shape[2]
                h_max_ds = input_flow.shape[3]
                f_out = interpolate.interp1d(np.linspace(0, 1, w_max_ds),
                                             input_flow,
                                             kind='linear',
                                             axis=2)
                input_flow = f_out(np.linspace(0, 1, w_max_ds * factor))
                f_out = interpolate.interp1d(np.linspace(0, 1, h_max_ds),
                                             input_flow,
                                             kind='linear',
                                             axis=3)
                input_flow = f_out(np.linspace(0, 1, h_max_ds * factor))
            input_flow = input_flow[:, :, :w_max, :h_max]
        """load data from numpy to torch and pre-processing"""
        # print('input_flow shape in dataloader:')
        # print(input_flow.shape)  # (num_crops*num_segments, 2, 224, 224)
        input_flow = torch.from_numpy(input_flow).float() / 255.0
        input_mv = torch.from_numpy(input_mv).float() / 255.0
        input_residual = torch.from_numpy(input_residual).float() / 255.0
        # print('input_flow after torch shape in dataloader:')
        # print(input_flow.shape)  # torch.Size([num_crops*num_segments, 2, 224, 224])

        if self._representation == 'iframe':
            input_mv = (input_mv - self._input_mean) / self._input_std
        elif self._representation == 'mv':
            input_mv = (input_mv - 0.5) / torch.mean(self._input_std)

        input_flow = (input_flow - 0.5) / torch.mean(self._input_std)
        input_residual = (input_residual - 0.5) / self._input_std

        # print('Input flow shape %s:' % str(input_flow.shape))  # torch.Size([1, num_crops*num_segments, 2, 224, 224])
        # print('Input mv shape %s:' % str(input_mv.shape))
        # print('Input residual shape %s:' % str(input_residual.shape))
        # print('Input mv scope min %s:' % str(input_mv.min()))
        # print('Input mv scope max %s:' % str(input_mv.max()))
        # print('Input flow scope min %s:' % str(input_flow.min()))
        # print('Input flow scope max %s:' % str(input_flow.max()))
        if (self._viz == True) and (self._is_train == False):
            classname = flow_path.split('/')[-2]
            img_tmpl = 'img_{:05d}.jpg'
            # idx is the index of the first frame/segment of the current video
            return input_flow, input_mv, input_residual, label, os.path.join(
                flow_path, flow_tmpl.format('x', idx)), os.path.join(
                    flow_path, flow_tmpl.format('y', idx)), os.path.join(
                        flow_path, img_tmpl.format(idx_first)), classname
        else:
            return input_flow, input_mv, input_residual, label
Esempio n. 7
0
    def __getitem__(self, index):

        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        num_gop = num_frames // GOP_SIZE

        for gop in range(num_gop):

            frames_i = []
            frames_m = []
            frames_r = []

            img_i = load(video_path, gop, 0, 0, self._accumulate)
            img_i = color_aug(img_i)
            img_i = img_i[..., ::-1]

            img_m = load(video_path, gop, 6, 1, self._accumulate)
            img_m = clip_and_scale(img_m, 20)
            img_m += 128
            img_m = (np.minimum(np.maximum(img_m, 0), 255)).astype(np.uint8)

            img_r = load(video_path, gop, 6, 2, self._accumulate)
            img_r += 128
            img_r = (np.minimum(np.maximum(img_r, 0), 255)).astype(np.uint8)

            frames_i.append(img_i)
            frames_m.append(img_m)
            frames_r.append(img_r)

            frames_i = self._transform_i(frames_i)
            frames_m = self._transform_m(frames_m)
            frames_r = self._transform_r(frames_r)

            frames_i = np.array(frames_i)
            frames_m = np.array(frames_m)
            frames_r = np.array(frames_r)
            frames_i = np.transpose(frames_i, (0, 3, 1, 2))
            frames_m = np.transpose(frames_m, (0, 3, 1, 2))
            frames_r = np.transpose(frames_r, (0, 3, 1, 2))
            input_i = torch.from_numpy(frames_i).float() / 255.0
            input_m = torch.from_numpy(frames_m).float() / 255.0
            input_r = torch.from_numpy(frames_r).float() / 255.0

            input_i = (input_i - self._input_mean) / self._input_std
            input_m = (input_m - 0.5)
            input_r = (input_r - 0.5) / self._input_std

            # print(input_i.shape)
            # a=input_i.view((-1, ) + input_i.size()[-3:])
            # print(a.shape)
            # print(input_m.shape)
            # print(input_r.shape)

            input1 = torch.cat((input_i, input_m, input_r), 1)

            # print(input1.shape)

            if gop == 0:
                input = input1
            else:
                input = torch.cat((input, input1), 0)
        # print(input.shape)
        # a=input.view((-1, ) + input.size()[-3:])
        # print(a.shape)
        # print(input)
        return input, label
Esempio n. 8
0
    def __getitem__(self, index):
        # siamese label, '0' means same, '1' means diff
        siamese_label = 0

        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2
        else:
            representation_idx = 0
        index_1 = 0
        index_2 = 0
        if self._is_train:
            # 我们需要构建一个正负样本
            index_1 = random.randint(0, self._size - 1)
            index_2 = 0
            if index % 2 == 0:
                # construct a POSTIVE pair
                indices = np.squeeze(
                    np.argwhere(
                        self._labels_list == self._labels_list[index_1]))
                index_2 = random.choice(indices)
                siamese_label = 0
            else:
                # construct a NEG pair
                indices = np.squeeze(
                    np.argwhere(
                        self._labels_list != self._labels_list[index_1]))
                index_2 = random.choice(indices)
                siamese_label = 1
        else:
            # 为了检测性能,这里还是需要用均衡的数据来验证,但是要确保每次生成的对是一样的
            index_1 = index
            index_2 = 0
            if index % 2 == 0:
                # construct a POSTIVE pair
                indices = np.squeeze(
                    np.argwhere(
                        self._labels_list == self._labels_list[index_1]))
                random.seed(index_1)
                index_2 = random.choice(indices)
                siamese_label = 0
            else:
                # construct a NEG pair
                indices = np.squeeze(
                    np.argwhere(
                        self._labels_list != self._labels_list[index_1]))
                random.seed(index_1)
                index_2 = random.choice(indices)
                siamese_label = 1

        frames_all = []
        # divide into segments, then fetch a frame in every seg
        for i in (index_1, index_2):
            frames_per_sample = []
            # TODO summary: bug a few time , convert i to int
            video_path = self._videos_list[int(i)]
            for seg in range(self._num_segments):

                if self._is_train:
                    gop_index, gop_pos = self._get_train_frame_index(
                        self._frames_list[i], seg)
                else:
                    gop_index, gop_pos = self._get_test_frame_index(
                        self._frames_list[i], seg)

                # load(input.mp4, 3, 8, 1, True)
                # returns the accumulated motion vectors of the 9th frame of the 4th GOP.
                img = load(video_path, gop_index, gop_pos, representation_idx,
                           self._accumulate)

                if img is None:
                    print('Error: loading video %s failed.' % video_path)
                    img = np.zeros(
                        (256, 256,
                         2)) if self._representation == 'mv' else np.zeros(
                             (256, 256, 3))
                else:
                    if self._representation == 'mv':
                        img = clip_and_scale(img, 20)
                        img += 128
                        img = (np.minimum(np.maximum(img, 0),
                                          255)).astype(np.uint8)
                    elif self._representation == 'residual':
                        img += 128
                        img = (np.minimum(np.maximum(img, 0),
                                          255)).astype(np.uint8)

                if self._representation == 'iframe':
                    img = color_aug(img)

                    # BGR to RGB. (PyTorch uses RGB according to doc.)
                    img = img[..., ::-1]

                frames_per_sample.append(img)
            frames_per_sample = self._transform(frames_per_sample)
            frames_per_sample = np.array(frames_per_sample)
            frames_per_sample = np.transpose(frames_per_sample, (0, 3, 1, 2))
            input = torch.from_numpy(frames_per_sample).float() / 255.0
            # when mv, the num_segments=3
            # The input shape is:
            # torch.Size([3, 2, 224, 224])
            # The input label is:  37
            if self._representation == 'iframe':
                input = (input - self._input_mean) / self._input_std
                # was a bug here, input_mean,std is a tensor ,so we must convert input a tensor
            elif self._representation == 'residual':
                input = (input - 0.5) / self._input_std
            elif self._representation == 'mv':
                input = (input - 0.5)

            frames_all.append(input)

        return frames_all, siamese_label
Esempio n. 9
0
    def __getitem__(self, index):

        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2
        else:
            representation_idx = 0

        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        frames = []
        for seg in range(self._num_segments):

            if self._is_train:
                gop_index, gop_pos = self._get_train_frame_index(
                    num_frames, seg)
            else:
                gop_index, gop_pos = self._get_test_frame_index(
                    num_frames, seg)

            img = load(video_path, gop_index, gop_pos, representation_idx,
                       self._accumulate)

            if img is None:
                print('Error: loading video %s failed.' % video_path)
                img = np.zeros(
                    (256, 256,
                     2)) if self._representation == 'mv' else np.zeros(
                         (256, 256, 3))
            else:
                if self._representation == 'mv':
                    img = clip_and_scale(img, 20)
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)
                elif self._representation == 'residual':
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)

            if self._representation == 'iframe':
                img = color_aug(img)

                # BGR to RGB. (PyTorch uses RGB according to doc.)
                img = img[..., ::-1]

            frames.append(img)

        frames = self._transform(frames)

        frames = np.array(frames)
        frames = np.transpose(frames, (0, 3, 1, 2))
        input = torch.from_numpy(frames).float() / 255.0

        if self._representation == 'iframe':
            input = (input - self._input_mean) / self._input_std
        elif self._representation == 'residual':
            input = (input - 0.5) / self._input_std
        elif self._representation == 'mv':
            input = (input - 0.5)

        return input, label