Python VideoReader Examples, decord.VideoReader Python Examples

Example #1

0

Show file

def extractSlides(videoPath):
    print(f"Reading {videoPath.as_posix()}...")

    vr = VideoReader(videoPath.as_posix(), ctx=cpu(0))
    fps = vr.get_avg_fps()
    print(f"Successfully read. FPS: {fps}")

    slides = []
    frameCount = 1
    prevImageHash = None
    imageChanged = False

    for i in trange(0, len(vr), int(fps)):
        frame = vr[i].asnumpy()
        pilImage = Image.fromarray(frame)
        prevImageHash = imagehash.average_hash(pilImage) if not prevImageHash else currentImageHash
        currentImageHash = imagehash.average_hash(pilImage)
        imageDiff = currentImageHash - prevImageHash

        if imageChanged and imageDiff < DIFF_THRESHOLD:
            slides.append(pilImage)
            imageChanged = False

        if imageDiff > DIFF_THRESHOLD:
            imageChanged = True

    return slides

Example #2

0

Show file

File: test_benchmark_decord.py Project: alesanfra/iterframes

def test_whole_video(video_path):
    from decord import VideoReader

    vr = VideoReader(video_path)
    for frame in read(video_path):
        frame_decord = vr.next().asnumpy()
        np.testing.assert_equal(frame, frame_decord)

Example #3

0

Show file

    def loadvideo_decord(self, sample, sample_rate_scale=1):
        """Load video content using Decord"""
        # pylint: disable=line-too-long, bare-except, unnecessary-comprehension
        fname = self.data_path + sample

        if not (os.path.exists(fname)):
            return []

        # avoid hanging issue
        if os.path.getsize(fname) < 1 * 1024:
            print('SKIP: ', fname, " - ", os.path.getsize(fname))
            return []
        try:
            if self.keep_aspect_ratio:
                vr = VideoReader(fname, num_threads=1, ctx=cpu(0))
            else:
                vr = VideoReader(fname,
                                 width=self.new_width,
                                 height=self.new_height,
                                 num_threads=1,
                                 ctx=cpu(0))
        except:
            print("video cannot be loaded by decord: ", fname)
            return []

        if self.mode == 'test':
            all_index = [x for x in range(0, len(vr), self.frame_sample_rate)]
            while len(all_index) < self.clip_len:
                all_index.append(all_index[-1])
            vr.seek(0)
            buffer = vr.get_batch(all_index).asnumpy()
            return buffer

        # handle temporal segments
        converted_len = int(self.clip_len * self.frame_sample_rate)
        seg_len = len(vr) // self.num_segment

        all_index = []
        for i in range(self.num_segment):
            if seg_len <= converted_len:
                index = np.linspace(0,
                                    seg_len,
                                    num=seg_len // self.frame_sample_rate)
                index = np.concatenate(
                    (index,
                     np.ones(self.clip_len - seg_len // self.frame_sample_rate)
                     * seg_len))
                index = np.clip(index, 0, seg_len - 1).astype(np.int64)
            else:
                end_idx = np.random.randint(converted_len, seg_len)
                str_idx = end_idx - converted_len
                index = np.linspace(str_idx, end_idx, num=self.clip_len)
                index = np.clip(index, str_idx, end_idx - 1).astype(np.int64)
            index = index + i * seg_len
            all_index.extend(list(index))

        all_index = all_index[::int(sample_rate_scale)]
        vr.seek(0)
        buffer = vr.get_batch(all_index).asnumpy()
        return buffer

Example #4

0

Show file

File: label_video.py Project: cconger/gameplay-classifer

def test_video(video_name):
    """loads the given video and feeds frames through the inference engine"""
    f = os.path.join(cachedir,
                     os.path.basename(os.path.splitext(video_name)[0]))
    if os.path.isfile(f + ".npy"):
        print(f"FOUND EXISTING CLASSIFICATIONS: {f}.npy")
        return np.load(f + ".npy")

    vr = VideoReader(video_name, ctx=cpu(0))

    frames = len(vr)
    print("video frames:", frames)
    decord.bridge.set_bridge('tensorflow')

    # Assuming 60 fps
    sample_rate = 60
    images_per_batch = 32
    samples = int(frames / sample_rate)
    batches = int(samples / images_per_batch)

    persample = np.empty((batches * images_per_batch, 4), dtype=np.uint32)

    for i in range(batches):
        print("batch", i, "of", batches)
        # Create a collection of frame indexes at each sample rate within the batch
        frameIdxs = [(x * sample_rate) + (i * images_per_batch * sample_rate)
                     for x in range(32)]
        frames = vr.get_batch(frameIdxs)

        res = inferLocal(frameIdxs, frames)
        persample[i * images_per_batch:(i + 1) * images_per_batch, :] = res

    print("saving to", f)
    np.save(f, persample)
    return persample

Example #5

0

Show file

def extract_frames(video, hi_dir, hi_size, times):
    info = get_video_info(video)
    w, h = info['coded_width'], info['coded_height']

    aspect_ratio = w / h
    if aspect_ratio > hi_size[0] / hi_size[1]:
        # Wide format
        wo, ho = hi_size[0], int(hi_size[0] // aspect_ratio)
    else:
        wo, ho = int(hi_size[1] * aspect_ratio), hi_size[1]

    framerate = int(info['nb_frames']) / float(info['duration'])

    nframes = []
    for time in times:
        nframes.append(int(framerate * (2 * (time + 1))))

    vr = VideoReader(video, ctx=cpu(0))
    nframes = [min(vr._num_frame - 1, x) for x in nframes]
    frames = vr.get_batch(nframes).asnumpy()

    for i in range(len(nframes)):
        frame = frames[i, :, :, :]
        # Now clear why r and b are mixed up.
        frame = frame[:, :, np.array([2, 1, 0])]
        assert frame.ndim == 3
        assert frame.shape[-1] == 3

        cv2.imwrite(os.path.join(hi_dir, f'thumb-{times[i]+1:04}.png'),
                    cv2.resize(frame, (wo, ho)))

Example #6

0

Show file

    def worker_func(idx, data_queue, msg_queue, anno_lst):
        while True:
            msg = msg_queue.get()

            if msg == 'stop':
                break

            elif msg == 'new_epoch':
                for anno in anno_lst:
                    if Enable_Time_Log:
                        t1 = time.time()
                    anno_copy = {k: v for k, v in anno.items()}
                    vr = VideoReader(anno['Video'], ctx=cpu(idx))
                    h, w, _ = Cfg.input_frame_shape

                    anno_copy['Frames'] = [
                        pickle.dumps(cv2.resize(img[:, :, ::-1], (w, h))) \
                        for img in \
                        list(vr.get_batch(anno['FrameIDs']).asnumpy())]
                    data_queue.put(anno_copy)
                    if Enable_Time_Log:
                        t2 = time.time()
                        print('Decord reader takes {:.3f}s'.format(t2 - t1))

            elif len(msg) == 2 and msg[0] == 'update':
                anno_lst = msg[1]

Example #7

0

Show file

File: test_decord.py Project: ChaokunChang/SVAS

 def __init__(self,
              video_file,
              img_size=(416, 416),
              gpu=None,
              num_threads=8,
              offset=0,
              is_torch=True):
     self.is_torch = is_torch
     if is_torch:
         decord.bridge.set_bridge('torch')
     if type(img_size) is tuple:
         self.img_size = img_size
     else:
         self.img_size = (img_size, img_size)
     self.offset = offset
     if gpu is None:
         ctx = decord.cpu()
     else:
         ctx = decord.gpu(gpu)
     if type(img_size) == int:
         img_size = (img_size, img_size)
     self._vr = VideoReader(video_file,
                            ctx=ctx,
                            width=img_size[0],
                            height=img_size[1],
                            num_threads=num_threads)

Example #8

0

Show file

File: datasets.py Project: Katou2/action_recognition_online

def get_train_clip(opts, video_path):
    """
        Chooses a random clip from a video for training/ validation
        Args:
            opts         : config options
            frame_path  : frames of video frames
            Total_frames: Number of frames in the video
        Returns:
            list(frames) : random clip (list of frames of length sample_duration) from a video for training/ validation
        """
    clip = []
    i = 0
    loop = False

    vr = VideoReader(video_path, width=-1, height=-1)
    # h, w = vr[0].shape[:2]
    # if h > w:
    #     r_w = 256
    #     r_h = int(h/w*256)
    # else:
    #     r_h = 256
    #     r_w = int(w/h*256)
    # vr = VideoReader(video_path, width=r_w, height=r_h)

    total_frames = len(vr)

    if total_frames > 300:
        interval = int(total_frames / (300 / opts.sample_duration))
        s_frame = np.random.randint(0, total_frames - interval)
        f_stamp = list(np.linspace(s_frame, s_frame+interval, 
                    opts.sample_duration).astype(np.int))
        clip = vr.get_batch(f_stamp).asnumpy()
        return torch.from_numpy(clip.transpose(3, 0, 1, 2).astype(np.float32))

    else:
        # choosing a random frame
        if total_frames <= opts.sample_duration: 
            loop = True
            start_frame = 0
        else:
            start_frame = np.random.randint(0, total_frames - opts.sample_duration)
        

        if opts.modality == 'RGB': 
            while len(clip) < opts.sample_duration:
                clip.append(vr.get_batch([start_frame+i]).asnumpy()[0]) # revised
                i += 1
                
                if loop and i == total_frames:
                    i = 0
        
        return torch.from_numpy(np.array(clip, dtype=np.float32).transpose(3, 0, 1, 2))

Example #9

0

Show file

    def load_video(self, path):
        '''
        https://github.com/dmlc/decord#installation
        https://github.com/dmlc/decord/blob/master/examples/video_reader.ipynb
        A decord wrapper implemented per the instruction
        Load the video as an object
        Args:
            path: the path to the video file

        Returns:
            none
        '''
        self.vr = VideoReader(path, width=320, height=240, ctx=cpu(0))

Example #10

0

Show file

    def __getitem__(self, idx):

        if idx < 0:
            return torch.zeros(1, 1, 1, 1), self.flattened_data_dir[idx]

        result = False
        vid = None
        # idx = 3456
        # deal with corrupted videos in list or videos which are just too long for us to process
        while not result:
            try:
                vid = VideoReader(self.flattened_data_dir[idx])
                if (int(len(vid)) > self.temporal_depth):
                    result = True
                else:
                    #idx = random.randint(0, len(self.flattened_data_dir)-1)
                    del vid
                    gc.collect()
                    return torch.zeros(901, 1, 1, 1), -1
            except:
                #idx = random.randint(0, len(self.flattened_data_dir)-1)
                del vid
                gc.collect()
                return torch.zeros(901, 1, 1, 1), -1

        frames = self.transform(vid, self.split)

        # vid.close()

        del vid
        gc.collect()
        return frames, self.flattened_data_dir[idx]

Example #11

0

Show file

File: valid_loader.py Project: Alterith/masters_code

    def __getitem__(self, idx):

        result = False
        vid = None
        cls = None
        #idx = None #random.randint(0,400)
        # deal with corrupted videos in list
        #print(self.flattened_data_dir[idx])
        while not result:
            try:
                #vid = pims.PyAVVideoReader(self.flattened_data_dir[idx])
                vid = VideoReader(self.flattened_data_dir[idx])
                cls = self.idx_per_file[idx]
                test_frame = vid[1]
                if(int(len(vid))>self.temporal_depth):
                    result = True
                else:
                    idx = random.randint(0, len(self.flattened_data_dir)-1)
            except:
                idx = random.randint(0, len(self.flattened_data_dir)-1)

        frames = self.transform(vid, self.split)
        #del rand_vid, vid
        #del vid
        #print(frames.shape)
        return frames, cls

Example #12

0

Show file

    def get(self, record, indices, path):
        images = list()
        if not self.video_source:
            # print(path)
            for seg_ind in indices:
                p = int(seg_ind)
                seg_imgs = self._load_image(path, p)
                images.extend(seg_imgs)
        else:
            vr = VideoReader(os.path.join(self.root_path, record.path),
                             ctx=cpu(0))
            for seg_ind in indices:
                try:
                    images.append(Image.fromarray(vr[seg_ind - 1].asnumpy()))
                except Exception as e:
                    images.append(Image.fromarray(vr[0].asnumpy()))

        process_data = self.transform(images)
        # import ipdb;ipdb.set_trace()
        # print(path)
        if self.multi_class:
            # print(record.mlabel)
            return process_data, record.mlabel
        else:
            return process_data, record.label

Example #13

0

Show file

File: detect_and_crop_on_videos_no_dataloader.py Project: ternaus/Pytorch_Retinaface

def video_reader(*args, **kwds):
    # Code to acquire resource, e.g.:
    resource = VideoReader(*args, **kwds)
    try:
        yield resource
    finally:
        del resource

Example #14

0

Show file

def test_bytes_io():
    fn = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', 'flipping_a_pancake.mkv'))
    with open(fn, 'rb') as f:
        vr = VideoReader(f)
        assert len(vr) == 310
        vr2 = _get_default_test_video()
        assert np.mean(np.abs(vr[10].asnumpy().astype('float') - vr2[10].asnumpy().astype('float'))) < 2 # average pixel diff < 2

Example #15

0

Show file

def _get_rotated_test_video(rot, height=-1, width=-1, ctx=CTX):
    return VideoReader(os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..', '..', 'test_data',
                     f'video_{rot}.mov')),
                       height=height,
                       width=width,
                       ctx=ctx)

Example #16

0

Show file

 def frames(self):
     vr = VideoReader(self._path)
     trans = torchvision.transforms.ToPILImage(mode='RGB')
     images = []
     for idx in range(len(vr)):
         images.append(trans(vr[idx].permute(2, 0, 1)).convert('RGB'))
     return images

Example #17

0

Show file

def get_decord(path):
    images_d = []
    vr = VideoReader(path, ctx=cpu(0))
    len_vr = len(vr)
    for i in range(len(vr)):
        # the video reader will handle seeking and skipping in the most efficient manner
        images_d.append(vr[i])
    print("decord", len(images_d))

Example #18

0

Show file

File: mutil_process_readvideo.py Project: haok61bkhn/Process_Video_yolov5

 def __init__(self, url, num_threads=1, batch=64):
     self.num_threads = multiprocessing.cpu_count()
     print("cpu count ", self.num_threads)
     self.vr = VideoReader(url, ctx=cpu(0))
     self.img_size = 640
     # self.detection = YOLOV5()
     self.n = len(self.vr)
     self.batch = batch

Example #19

0

Show file

File: test_benchmark_decord.py Project: alesanfra/iterframes

def test_same_behavior_as_decord(video_path):
    from decord import VideoReader

    frame = read(video_path).__next__()
    decord_frame = VideoReader(video_path).next().asnumpy()

    assert frame.shape == decord_frame.shape
    np.testing.assert_equal(frame, decord_frame)

Example #20

0

Show file

File: test_decord.py Project: ChaokunChang/SVAS

class DecordVideoReader():
    def __init__(self,
                 video_file,
                 img_size=(416, 416),
                 gpu=None,
                 num_threads=8,
                 offset=0,
                 is_torch=True):
        self.is_torch = is_torch
        if is_torch:
            decord.bridge.set_bridge('torch')
        if type(img_size) is tuple:
            self.img_size = img_size
        else:
            self.img_size = (img_size, img_size)
        self.offset = offset
        if gpu is None:
            ctx = decord.cpu()
        else:
            ctx = decord.gpu(gpu)
        if type(img_size) == int:
            img_size = (img_size, img_size)
        self._vr = VideoReader(video_file,
                               ctx=ctx,
                               width=img_size[0],
                               height=img_size[1],
                               num_threads=num_threads)

    def __len__(self):
        return len(self._vr) - self.offset

    def __getitem__(self, idx):
        if self.is_torch:
            return self._vr[idx + self.offset].permute(
                2, 0, 1).contiguous().float().div(255)
        else:
            return self._vr[idx + self.offset].asnumpy()

    def get_batch(self, batch):
        batch = [b + self.offset for b in batch]
        if self.is_torch:
            return self._vr.get_batch(batch).permute(
                0, 3, 1, 2).contiguous().float().div(255)
        else:
            return self._vr.get_batch(batch).asnumpy()

Example #21

0

Show file

def test_bytes_io():
    fn = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples',
                     'flipping_a_pancake.mkv'))
    with open(fn, 'rb') as f:
        vr = VideoReader(f)
        assert len(vr) == 310
        vr2 = _get_default_test_video()
        assert np.allclose(vr[10].asnumpy(), vr2[10].asnumpy())

Example #22

0

Show file

File: decord_speed.py Project: innerlee/cvbenchmark

def run():
    for f in files:
        file = open(f, "rb")
        bs = file.read()
        file.close()
        a = datetime.datetime.now()
        with open('/dev/shm/a.mp4', 'wb') as bf:
            bf.write(bs)

        vr = VideoReader('/dev/shm/a.mp4', ctx=cpu(0), num_threads=0)

        a = datetime.datetime.now()
        vr = VideoReader(f, ctx=cpu(0), num_threads=0)
        b = datetime.datetime.now()
        print("init: ", (b - a).microseconds, "us")
        for i in np.array([10, 12, 14, 60, 62, 64]) + 0:
            vr[i]
        c = datetime.datetime.now()
        print(f, "decode: ", (c - b).microseconds, "us")

Example #23

0

Show file

    def __getitem__(self, idx):
        """
        Returns:
            tuple_frame (tensor): [tuple_len x channel x height x width]
            tuple_order (tensor): [tuple_len]
        """
        if self.train:
            videoname = self.train_split[idx]
        else:
            videoname = self.test_split[idx]
        filename = os.path.join(self.root_dir, 'video', videoname)
        #videodata = skvideo.io.vread(filename)
        #length, height, width, channel = videodata.shape
        videodata = VideoReader(filename, ctx=cpu(0))
        length = len(videodata)
        height = videodata[0].shape[0]
        width = videodata[0].shape[1]
        channel = videodata[0].shape[2]

        tuple_frame = []
        tuple_order = list(range(0, self.tuple_len))

        # random select frame for train, deterministic random select for test
        if self.train:
            tuple_start = random.randint(0, length - self.tuple_total_frames)
        else:
            random.seed(idx)
            tuple_start = random.randint(0, length - self.tuple_total_frames)

        frame_idx = tuple_start
        for _ in range(self.tuple_len):
            tuple_frame.append(videodata[frame_idx])
            frame_idx = frame_idx + self.interval

        frame_and_order = list(zip(tuple_frame, tuple_order))
        # random shuffle for train, the same shuffle for test
        if self.train:
            random.shuffle(frame_and_order)
        else:
            random.seed(idx)
            random.shuffle(frame_and_order)
        tuple_frame, tuple_order = zip(*frame_and_order)

        if self.transforms_:
            trans_tuple = []
            for frame in tuple_frame:
                frame = self.toPIL(frame)  # PIL image
                frame = self.transforms_(frame)  # tensor [C x H x W]
                trans_tuple.append(frame)
            tuple_frame = trans_tuple
        else:
            tuple_frame = [torch.tensor(frame) for frame in tuple_frame]

        return torch.stack(tuple_frame), torch.tensor(tuple_order)

Example #24

0

Show file

File: test_benchmark_decord.py Project: alesanfra/iterframes

def test_same_behavior_as_decord_with_resize(video_path):
    from decord import VideoReader

    height, width = 540, 960

    frame = read(video_path, height=height, width=width).__next__()
    decord_frame = VideoReader(video_path, width=width,
                               height=height).next().asnumpy()

    assert frame.shape == decord_frame.shape
    np.testing.assert_equal(frame, decord_frame)

Example #25

0

Show file

    def _check_video(video_filename):
        ok = False
        try:
            container = VideoReader(video_filename, num_threads=1)
            if len(container) > 0:
                ok = True

            del container
        except:
            pass

        return ok

Example #26

0

Show file

File: extract_frames.py Project: Ivanka07/poor_neck_posture_dataset

def extract_frames_from_video(video_file, video_id, target_dir):
    results = []

    """
    for each video file creates the corresponding directory if not exists 
    csv item: video_id;path_to_frame;frame_index;avg_fps;yolo3_classes;caption;score;
    """
    # a file like object works as well, for in-memory decoding
    with open(video_file, 'rb') as f:
        vr = VideoReader(f, ctx=cpu(0))
        print('video frames:', len(vr))
        total_frames = len(vr)
        avg_fps = int(vr.get_avg_fps())
        # 1. the simplest way is to directly access frames
        print('get_avg_fps=', vr.get_avg_fps())
        for i in range(0, len(vr), avg_fps):
            
      #      # the video reader will handle seeking and skipping in the most efficient manner
            frame = vr[i]
            save_path = os.path.join(target_dir,"{:010d}.jpg".format(i))
            if not os.path.exists(save_path):
                print(frame.shape)
                img = frame.asnumpy()
                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                detected_classes_list = detect_objects_single_image(img)
                words = get_caption_single_image(img)
                score= get_score(words, detected_classes_list)
                if score > 2:
                    frame_result = []
                    frame_result.append(video_id)
                    frame_result.append(save_path)
                    frame_result.append(i)
                    frame_result.append(avg_fps)
                    frame_result.append(detected_classes_list)
                    frame_result.append(words)
                    frame_result.append(score)
                    cv2.imwrite(save_path, img)
                    results.extend(frame_result)
    return results

Example #27

0

Show file

    def __init__(self, video_file, frame_idxs=None):
        """
        :param video_file: video file path
        :param frame_idxs: frame that are to be processed, a list of integers
        """

        self.vr = VideoReader(video_file, ctx=cpu(0))
        self._rotation = check_rotation(video_file)

        if frame_idxs is None:
            self._frame_idxs = np.arange(len(self.vr))
        else:
            self._frame_idxs = sorted(frame_idxs)

Example #28

0

Show file

File: detect_and_crop_on_videos.py Project: ternaus/Pytorch_Retinaface

def get_frames(video_path: Path, num_frames: int, resize_coeff: Tuple[int,
                                                                      int],
               transform: albu.Compose, decode_gpu: bool) -> Dict[str, Any]:
    try:
        if decode_gpu:
            video = VideoReader(str(video_path), ctx=gpu(0))
        else:
            video = VideoReader(str(video_path), ctx=cpu(0))

        len_video = len(video)

        if num_frames is None:
            frame_ids = list(range(len_video))
        else:
            if len_video < num_frames:
                step = 1
            else:
                step = int(len_video / num_frames)

            frame_ids = list(range(0, len_video, step))[:num_frames]

        frames = video.get_batch(frame_ids).asnumpy()

        torched_frames, resize_factor = prepare_frames(frames, resize_coeff,
                                                       transform)

        result = {
            "torched_frames": torched_frames,
            "resize_factor": resize_factor,
            "video_path": video_path,
            "frame_ids": np.array(frame_ids),
            "frames": frames,
        }
    except DECORDError:
        print(f"{video_path} is broken")
        result = {}

    return result

Example #29

0

Show file

File: download_videos.py Project: openvinotoolkit/mmaction2

    def _check_video(video_filename):
        ok = False
        try:
            container = VideoReader(video_filename, num_threads=1)
            if len(container) > 0:
                ok = True

            del container
        except:
            pass

        if ok:
            return video_filename, True, "Checked"
        else:
            remove(video_filename)
            return video_filename, False, "Invalid video file"

Example #30

0

Show file

File: hmdb51.py Project: YangLiu9208/TCGL

    def __getitem__(self, idx):
        """
        Returns:
            clip (tensor): [channel x time x height x width]
            class_idx (tensor): class index [0-50]
        """
        if self.train:
            videoname = self.train_split[idx]
        else:
            videoname = self.test_split[idx]
        class_idx = self.class_label2idx[videoname[:videoname.find('/')]] - 1
        filename = os.path.join(self.root_dir, 'video', videoname)

        videodata = VideoReader(filename, ctx=cpu(0))
        #length, height, width, channel = videodata.shape
        length = len(videodata)
        height = videodata[0].shape[0]
        width = videodata[0].shape[1]
        channel = videodata[0].shape[2]

        all_clips = []
        all_idx = []
        for i in np.linspace(self.clip_len / 2, length - self.clip_len / 2,
                             self.sample_num):
            clip_start = int(i - self.clip_len / 2)
            clip = videodata[clip_start:clip_start + self.clip_len]
            if self.transforms_:
                trans_clip = []
                # fix seed, apply the sample `random transformation` for all frames in the clip
                seed = random.random()
                for frame in clip.asnumpy():
                    random.seed(seed)
                    frame = self.toPIL(frame)  # PIL image
                    frame = self.transforms_(frame)  # tensor [C x H x W]
                    trans_clip.append(frame)
                # (T x C X H x W) to (C X T x H x W)
                clip = torch.stack(trans_clip).permute([1, 0, 2, 3])
                #frequency
                clip_mean = torch.mean(clip, 1, keepdim=True)
                clip = clip - clip_mean
            else:
                clip = torch.tensor(clip)
            all_clips.append(clip)
            all_idx.append(torch.tensor(int(class_idx)))

        return torch.stack(all_clips), torch.stack(all_idx)