Python VideoReader.get_batchの例、decord.VideoReader.get_batch Pythonの例

コード例 #1

0

ファイルを表示

    def loadvideo_decord(self, sample, sample_rate_scale=1):
        """Load video content using Decord"""
        # pylint: disable=line-too-long, bare-except, unnecessary-comprehension
        fname = self.data_path + sample

        if not (os.path.exists(fname)):
            return []

        # avoid hanging issue
        if os.path.getsize(fname) < 1 * 1024:
            print('SKIP: ', fname, " - ", os.path.getsize(fname))
            return []
        try:
            if self.keep_aspect_ratio:
                vr = VideoReader(fname, num_threads=1, ctx=cpu(0))
            else:
                vr = VideoReader(fname,
                                 width=self.new_width,
                                 height=self.new_height,
                                 num_threads=1,
                                 ctx=cpu(0))
        except:
            print("video cannot be loaded by decord: ", fname)
            return []

        if self.mode == 'test':
            all_index = [x for x in range(0, len(vr), self.frame_sample_rate)]
            while len(all_index) < self.clip_len:
                all_index.append(all_index[-1])
            vr.seek(0)
            buffer = vr.get_batch(all_index).asnumpy()
            return buffer

        # handle temporal segments
        converted_len = int(self.clip_len * self.frame_sample_rate)
        seg_len = len(vr) // self.num_segment

        all_index = []
        for i in range(self.num_segment):
            if seg_len <= converted_len:
                index = np.linspace(0,
                                    seg_len,
                                    num=seg_len // self.frame_sample_rate)
                index = np.concatenate(
                    (index,
                     np.ones(self.clip_len - seg_len // self.frame_sample_rate)
                     * seg_len))
                index = np.clip(index, 0, seg_len - 1).astype(np.int64)
            else:
                end_idx = np.random.randint(converted_len, seg_len)
                str_idx = end_idx - converted_len
                index = np.linspace(str_idx, end_idx, num=self.clip_len)
                index = np.clip(index, str_idx, end_idx - 1).astype(np.int64)
            index = index + i * seg_len
            all_index.extend(list(index))

        all_index = all_index[::int(sample_rate_scale)]
        vr.seek(0)
        buffer = vr.get_batch(all_index).asnumpy()
        return buffer

コード例 #2

0

ファイルを表示

ファイル: datasets.py プロジェクト: Katou2/action_recognition_online

def get_train_clip(opts, video_path):
    """
        Chooses a random clip from a video for training/ validation
        Args:
            opts         : config options
            frame_path  : frames of video frames
            Total_frames: Number of frames in the video
        Returns:
            list(frames) : random clip (list of frames of length sample_duration) from a video for training/ validation
        """
    clip = []
    i = 0
    loop = False

    vr = VideoReader(video_path, width=-1, height=-1)
    # h, w = vr[0].shape[:2]
    # if h > w:
    #     r_w = 256
    #     r_h = int(h/w*256)
    # else:
    #     r_h = 256
    #     r_w = int(w/h*256)
    # vr = VideoReader(video_path, width=r_w, height=r_h)

    total_frames = len(vr)

    if total_frames > 300:
        interval = int(total_frames / (300 / opts.sample_duration))
        s_frame = np.random.randint(0, total_frames - interval)
        f_stamp = list(np.linspace(s_frame, s_frame+interval, 
                    opts.sample_duration).astype(np.int))
        clip = vr.get_batch(f_stamp).asnumpy()
        return torch.from_numpy(clip.transpose(3, 0, 1, 2).astype(np.float32))

    else:
        # choosing a random frame
        if total_frames <= opts.sample_duration: 
            loop = True
            start_frame = 0
        else:
            start_frame = np.random.randint(0, total_frames - opts.sample_duration)
        

        if opts.modality == 'RGB': 
            while len(clip) < opts.sample_duration:
                clip.append(vr.get_batch([start_frame+i]).asnumpy()[0]) # revised
                i += 1
                
                if loop and i == total_frames:
                    i = 0
        
        return torch.from_numpy(np.array(clip, dtype=np.float32).transpose(3, 0, 1, 2))

コード例 #3

0

ファイルを表示

    def worker_func(idx, data_queue, msg_queue, anno_lst):
        while True:
            msg = msg_queue.get()

            if msg == 'stop':
                break

            elif msg == 'new_epoch':
                for anno in anno_lst:
                    if Enable_Time_Log:
                        t1 = time.time()
                    anno_copy = {k: v for k, v in anno.items()}
                    vr = VideoReader(anno['Video'], ctx=cpu(idx))
                    h, w, _ = Cfg.input_frame_shape

                    anno_copy['Frames'] = [
                        pickle.dumps(cv2.resize(img[:, :, ::-1], (w, h))) \
                        for img in \
                        list(vr.get_batch(anno['FrameIDs']).asnumpy())]
                    data_queue.put(anno_copy)
                    if Enable_Time_Log:
                        t2 = time.time()
                        print('Decord reader takes {:.3f}s'.format(t2 - t1))

            elif len(msg) == 2 and msg[0] == 'update':
                anno_lst = msg[1]

コード例 #4

0

ファイルを表示

ファイル: label_video.py プロジェクト: cconger/gameplay-classifer

def test_video(video_name):
    """loads the given video and feeds frames through the inference engine"""
    f = os.path.join(cachedir,
                     os.path.basename(os.path.splitext(video_name)[0]))
    if os.path.isfile(f + ".npy"):
        print(f"FOUND EXISTING CLASSIFICATIONS: {f}.npy")
        return np.load(f + ".npy")

    vr = VideoReader(video_name, ctx=cpu(0))

    frames = len(vr)
    print("video frames:", frames)
    decord.bridge.set_bridge('tensorflow')

    # Assuming 60 fps
    sample_rate = 60
    images_per_batch = 32
    samples = int(frames / sample_rate)
    batches = int(samples / images_per_batch)

    persample = np.empty((batches * images_per_batch, 4), dtype=np.uint32)

    for i in range(batches):
        print("batch", i, "of", batches)
        # Create a collection of frame indexes at each sample rate within the batch
        frameIdxs = [(x * sample_rate) + (i * images_per_batch * sample_rate)
                     for x in range(32)]
        frames = vr.get_batch(frameIdxs)

        res = inferLocal(frameIdxs, frames)
        persample[i * images_per_batch:(i + 1) * images_per_batch, :] = res

    print("saving to", f)
    np.save(f, persample)
    return persample

コード例 #5

0

ファイルを表示

def extract_frames(video, hi_dir, hi_size, times):
    info = get_video_info(video)
    w, h = info['coded_width'], info['coded_height']

    aspect_ratio = w / h
    if aspect_ratio > hi_size[0] / hi_size[1]:
        # Wide format
        wo, ho = hi_size[0], int(hi_size[0] // aspect_ratio)
    else:
        wo, ho = int(hi_size[1] * aspect_ratio), hi_size[1]

    framerate = int(info['nb_frames']) / float(info['duration'])

    nframes = []
    for time in times:
        nframes.append(int(framerate * (2 * (time + 1))))

    vr = VideoReader(video, ctx=cpu(0))
    nframes = [min(vr._num_frame - 1, x) for x in nframes]
    frames = vr.get_batch(nframes).asnumpy()

    for i in range(len(nframes)):
        frame = frames[i, :, :, :]
        # Now clear why r and b are mixed up.
        frame = frame[:, :, np.array([2, 1, 0])]
        assert frame.ndim == 3
        assert frame.shape[-1] == 3

        cv2.imwrite(os.path.join(hi_dir, f'thumb-{times[i]+1:04}.png'),
                    cv2.resize(frame, (wo, ho)))

コード例 #6

0

ファイルを表示

ファイル: test_decord.py プロジェクト: ChaokunChang/SVAS

class DecordVideoReader():
    def __init__(self,
                 video_file,
                 img_size=(416, 416),
                 gpu=None,
                 num_threads=8,
                 offset=0,
                 is_torch=True):
        self.is_torch = is_torch
        if is_torch:
            decord.bridge.set_bridge('torch')
        if type(img_size) is tuple:
            self.img_size = img_size
        else:
            self.img_size = (img_size, img_size)
        self.offset = offset
        if gpu is None:
            ctx = decord.cpu()
        else:
            ctx = decord.gpu(gpu)
        if type(img_size) == int:
            img_size = (img_size, img_size)
        self._vr = VideoReader(video_file,
                               ctx=ctx,
                               width=img_size[0],
                               height=img_size[1],
                               num_threads=num_threads)

    def __len__(self):
        return len(self._vr) - self.offset

    def __getitem__(self, idx):
        if self.is_torch:
            return self._vr[idx + self.offset].permute(
                2, 0, 1).contiguous().float().div(255)
        else:
            return self._vr[idx + self.offset].asnumpy()

    def get_batch(self, batch):
        batch = [b + self.offset for b in batch]
        if self.is_torch:
            return self._vr.get_batch(batch).permute(
                0, 3, 1, 2).contiguous().float().div(255)
        else:
            return self._vr.get_batch(batch).asnumpy()

コード例 #7

0

ファイルを表示

ファイル: _test_speed_decord.py プロジェクト: huww98/VideoLoader

def main():
    files = list(BASE.glob('**/*.mp4'))
    frames = list(range(16))

    # Warm up
    for f in files[:16]:
        v = VideoReader(str(f))
        v.get_batch(frames)

    print(f"Reading {len(frames)} frames from {len(files)} files")

    for i in range(3):
        print(f'pass {i + 1}')
        t1 = time.perf_counter()
        for f in files:
            v = VideoReader(str(f))
            v.get_batch(frames)
        t2 = time.perf_counter()
        print(f'Time: {t2-t1}')

    print('Done')

コード例 #8

0

ファイルを表示

ファイル: detect_and_crop_on_videos.py プロジェクト: ternaus/Pytorch_Retinaface

def get_frames(video_path: Path, num_frames: int, resize_coeff: Tuple[int,
                                                                      int],
               transform: albu.Compose, decode_gpu: bool) -> Dict[str, Any]:
    try:
        if decode_gpu:
            video = VideoReader(str(video_path), ctx=gpu(0))
        else:
            video = VideoReader(str(video_path), ctx=cpu(0))

        len_video = len(video)

        if num_frames is None:
            frame_ids = list(range(len_video))
        else:
            if len_video < num_frames:
                step = 1
            else:
                step = int(len_video / num_frames)

            frame_ids = list(range(0, len_video, step))[:num_frames]

        frames = video.get_batch(frame_ids).asnumpy()

        torched_frames, resize_factor = prepare_frames(frames, resize_coeff,
                                                       transform)

        result = {
            "torched_frames": torched_frames,
            "resize_factor": resize_factor,
            "video_path": video_path,
            "frame_ids": np.array(frame_ids),
            "frames": frames,
        }
    except DECORDError:
        print(f"{video_path} is broken")
        result = {}

    return result

コード例 #9

0

ファイルを表示

def extract_frames(video_path,
                   frames_dir,
                   overwrite=False,
                   start=-1,
                   end=-1,
                   every=1):
    """
    Extract frames from a video using decord's VideoReader
    :param video_path: path of the video
    :param frames_dir: the directory to save the frames
    :param overwrite: to overwrite frames that already exist?
    :param start: start frame
    :param end: end frame
    :param every: frame spacing
    :return: count of images saved
    """

    video_path = os.path.normpath(
        video_path)  # make the paths OS (Windows) compatible
    # video_path = os.path.normpath("/home/julan/Downloads/TestVideo-2020-08-07_16.20.54.mp4")  # make the paths OS (Windows) compatible
    frames_dir = os.path.normpath(
        frames_dir)  # make the paths OS (Windows) compatible
    # frames_dir = os.path.normpath("/home/julan/Downloads/Temporal")  # make the paths OS (Windows) compatible

    video_dir, video_filename = os.path.split(
        video_path)  # get the video path and filename from the path
    # video_dir, video_filename = os.path.split("/home/julan/Downloads/Temporal")

    assert os.path.exists(video_path)  # assert the video file exists

    # load the VideoReader
    vr = VideoReader(video_path,
                     ctx=cpu(0))  # can set to cpu or gpu .. ctx=gpu(0)

    if start < 0:  # if start isn't specified lets assume 0
        start = 0
    if end < 0:  # if end isn't specified assume the end of the video
        end = len(vr)

    frames_list = list(range(start, end, every))
    saved_count = 0

    if every > 25 and len(
            frames_list
    ) < 1000:  # this is faster for every > 25 frames and can fit in memory
        frames = vr.get_batch(frames_list).asnumpy()

        for index, frame in zip(
                frames_list,
                frames):  # lets loop through the frames until the end
            save_path = os.path.join(
                frames_dir, video_filename,
                "{:010d}.jpg".format(index))  # create the save path
            if not os.path.exists(
                    save_path
            ) or overwrite:  # if it doesn't exist or we want to overwrite anyways
                cv2.imwrite(save_path, cv2.cvtColor(
                    frame, cv2.COLOR_RGB2BGR))  # save the extracted image
                saved_count += 1  # increment our counter by one

    else:  # this is faster for every <25 and consumes small memory
        for index in range(start,
                           end):  # lets loop through the frames until the end
            frame = vr[index]  # read an image from the capture

            if index % every == 0:  # if this is a frame we want to write out based on the 'every' argument
                save_path = os.path.join(
                    frames_dir,
                    "{:010d}.jpg".format(index))  # create the save path
                if not os.path.exists(
                        save_path
                ) or overwrite:  # if it doesn't exist or we want to overwrite anyways
                    cv2.imwrite(
                        save_path,
                        cv2.cvtColor(
                            frame.asnumpy(),
                            cv2.COLOR_RGB2BGR))  # save the extracted image
                    saved_count += 1  # increment our counter by one

    return saved_count  # and return the count of the images we saved

コード例 #10

0

ファイルを表示

ファイル: datasets.py プロジェクト: Katou2/action_recognition_online

def get_test_clip(opts, video_path):
    """
        Args:
            opts         : config options
            frame_path  : frames of video frames
            Total_frames: Number of frames in the video
        Returns:
            list(frames) : list of all video frames
        """

    clip = []
    clip_stamps = []
    i = 0

    try: vr = VideoReader(video_path, width=-1, height=-1)
    except: 
        print('video path {} cannot be opened'.format(video_path))
        with open('un_opened_file.txt', 'a') as f:
            f.write(video_path)
            f.write('\n')
    
    # h, w = vr[0].shape[:2]
    # if h > w:
    #     r_w = 256
    #     r_h = int(h/w*256)
    # else:
    #     r_h = 256
    #     r_w = int(w/h*256)
    # vr = VideoReader(video_path, width=r_w, height=r_h)


    total_frames = len(vr)
    # in case video FPS >> 30 
    if total_frames > 300:
        s_stamp = np.linspace(0, total_frames, int(300/16)+1)
        s_stamp = s_stamp.astype(np.int)
        for i in range(len(s_stamp[:-1])):
            i_batch = list(np.linspace(s_stamp[i], s_stamp[i+1]-1, 16).astype(np.int))
            clip_stamps.append(i_batch)
    else:
        if total_frames < opts.sample_duration: 
            single_clip_stamp = list(range(0, total_frames))
            while len(single_clip_stamp) < opts.sample_duration:
                single_clip_stamp.append(i)
                i += 1
                if i >= total_frames-1:
                    i = 0
            clip_stamps.append(single_clip_stamp)
        else:
            s_stamp = list(range(0, total_frames, opts.sample_duration))[:-1]
            s_stamp.append(total_frames - opts.sample_duration)
            for f_start in s_stamp:
                clip_stamps.append(list(range(f_start, f_start+opts.sample_duration)))
        
    if opts.modality == 'RGB': 
        for stamps in clip_stamps:
            # batch = vr.get_batch(stamps).asnumpy()
            # show_img_numpy(batch[0])
            clip.append(vr.get_batch(stamps).asnumpy())

    return torch.from_numpy(np.array(clip, dtype=np.float32).transpose(0, 4, 1, 2, 3))

コード例 #11

0

ファイルを表示

    def __getitem__(self, idx):
        """
        Returns:
            tuple_clip (tensor): [tuple_len x channel x time x height x width]
            tuple_order (tensor): [tuple_len]
        """
        if self.train:
            videoname = self.train_split[idx].split()[0]
        else:
            videoname = self.test_split[idx].split()[0]

        filename = os.path.join(self.root_dir, 'video',
                                videoname)  #.replace('\\', '/')
        #videodata = skvideo.io.vread(filename)
        videodata = VideoReader(filename, ctx=cpu(0))
        #length, height, width, channel = videodata.shape
        length = len(videodata)
        height = videodata[0].shape[0]
        width = videodata[0].shape[1]
        channel = videodata[0].shape[2]

        tuple_clip = []
        tuple_order = list(range(0, self.tuple_len))

        # random select tuple for train, deterministic random select for test
        if self.train:
            tuple_start = random.randint(0,
                                         abs(length - self.tuple_total_frames))
        else:
            random.seed(idx)
            tuple_start = random.randint(0,
                                         abs(length - self.tuple_total_frames))

        clip_start = tuple_start
        for _ in range(self.tuple_len):
            clip = videodata.get_batch(
                list(range(clip_start, clip_start + 3 * self.clip_len, 3)))
            tuple_clip.append(clip)
            clip_start = clip_start + 3 * self.clip_len + self.interval

        clip_and_order = list(zip(tuple_clip, tuple_order))
        # random shuffle for train, the same shuffle for test
        if self.train:
            random.shuffle(clip_and_order)
        else:
            random.seed(idx)
            random.shuffle(clip_and_order)
        tuple_clip, tuple_order = zip(*clip_and_order)

        if self.transforms_:
            trans_tuple = []
            for clip in tuple_clip:
                trans_clip = []
                # fix seed, apply the sample `random transformation` for all frames in the clip
                seed = random.random()
                for frame in clip.asnumpy():
                    random.seed(seed)
                    frame = self.toPIL(frame)  # PIL image
                    frame = self.transforms_(frame)  # tensor [C x H x W]
                    trans_clip.append(frame)
                # (T x C X H x W) to (C X T x H x W)
                trans_clip = torch.stack(trans_clip).permute([1, 0, 2, 3])
                trans_tuple.append(trans_clip)
            tuple_clip = trans_tuple
        else:
            tuple_clip = [torch.tensor(clip) for clip in tuple_clip]

        return torch.stack(tuple_clip), torch.tensor(tuple_order), idx

コード例 #12

0

ファイルを表示

ファイル: decord_test.py プロジェクト: thavlik/machine-learning-portfolio

    shuffle=1)
ex = vl.next()
vr = VideoReader(path, ctx=cpu(0))
# a file like object works as well, for in-memory decoding
with open(path, 'rb') as f:
    vr = VideoReader(f, ctx=cpu(0))
print('video frames:', len(vr))
# 1. the simplest way is to directly access frames
for i in range(len(vr)):
    # the video reader will handle seeking and skipping in the most efficient manner
    frame = vr[i]
    print(frame.shape)

# To get multiple frames at once, use get_batch
# this is the efficient way to obtain a long list of frames
frames = vr.get_batch([1, 3, 5, 7, 9])
print(frames.shape)
# (5, 240, 320, 3)
# duplicate frame indices will be accepted and handled internally to avoid duplicate decoding
frames2 = vr.get_batch([1, 2, 3, 2, 3, 4, 3, 4, 5])
print(frames2.shape)
# (9, 240, 320, 3)

# 2. you can do cv2 style reading as well
# skip 100 frames
vr.skip_frames(100)
# seek to start
vr.seek(0)
batch = vr.next()
print('frame shape:', batch.shape)

コード例 #13

0

ファイルを表示

ファイル: generator.py プロジェクト: Sapphirine/dream-generator

class Generator:
    def __init__(self, dataset_path):
        self.counter = 0
        self.dataset = dataset_path
        self.models = os.listdir('checkpoints')
        self.dream_len = 0
        self.frame_rate = 0
        self.vr = None

    def set_dream_len(self, len_in_seconds=60, frame_rate=30):
        self.dream_len = len_in_seconds * frame_rate
        self.frame_rate = frame_rate
        self.counter = 0

    def is_dream_terminated(self):
        if self.counter < self.dream_len:
            return False
        else:
            return True

    def set_new_video(self, path):
        self.vr = VideoReader(path, width=320, height=240, ctx=cpu(0))

    def get_total_frame_count(self):
        return len(self.vr)

    def get_a_frame(self, frame):
        if frame < self.get_total_frame_count():
            return self.vr[frame].asnumpy() - 1
        else:
            print('frame is out of range')

    def get_frames(self, skipping=1):
        return self.vr.get_batch(range(0, len(self.vr) - 1, skipping))

    def process(self, model_name):
        # make a new directory for storing temp data
        if os.path.exists('data/raw'):
            shutil.rmtree('data/raw')
            os.mkdir('data/raw')
        else:
            os.mkdir('data/raw')

        # check to make sure the model is available
        if model_name not in self.models:
            print('specified model not available')
            return
        else:
            frame_count = 0
            total_frames = self.get_total_frame_count()
            # decompose a video into frames
            while not self.is_dream_terminated():
                if frame_count < total_frames:
                    img = Image.fromarray(self.get_a_frame(frame_count))
                    img.save('data/raw/' +
                             '{num:05d}'.format(num=self.counter) + '.png')
                    frame_count += 1
                    self.counter += 1
                else:
                    break

            # start translating the images
            transform(model_name, total_frames)
            # copy the results to the translated folder
            path = os.path.join('data/tmp', model_name, 'test_latest/images')
            files = [img for img in os.listdir(path) if img.endswith(".png")]
            for file in files:
                if 'fake' in file:
                    # copy the translated images to the translated folder
                    shutil.copy(os.path.join(path, file), 'data/translated')

    def generate_video(self, path):
        image_files = [
            path + '/' + img for img in os.listdir(path)
            if img.endswith(".png")
        ]
        image_files.sort()
        clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(
            image_files, fps=self.frame_rate)
        clip.write_videofile('results/dream.mp4')

コード例 #14

0

ファイルを表示

def get_decord_batch(path):
    vr = VideoReader(path, ctx=cpu(0))
    len_vr = len(vr)
    batch = vr.get_batch(range(len_vr))
    print("decord", batch.shape)

コード例 #15

0

ファイルを表示

                }
            ]
            indices = [0] + indices + [len(scene)]
            breakpoints = [(indices[i], indices[i + 1])
                           for i in range(0,
                                          len(indices) - 1)]
            breakpoints = [
                _breakpoint for _breakpoint in breakpoints if _breakpoint[0] +
                1 != _breakpoint[1] and _breakpoint[0] != _breakpoint[1]
            ]

            for itr, (scene_start, scene_end) in enumerate(breakpoints):
                # validate the scene
                if scene_end - scene_start > 20:
                    # convert this to iterations
                    frames = scene.get_batch(range(scene_start,
                                                   scene_end)).asnumpy()
                    start_time, end_time = (
                        frame_count_to_timestamp(scene_start, fps),
                        frame_count_to_timestamp(scene_end, fps),
                    )
                    # extract the clip and save it in the final folder
                    if not os.path.exists(CURRENT_DOWNLOAD_PATH):
                        os.makedirs(CURRENT_DOWNLOAD_PATH, exist_ok=True)

                    clip = VideoFileClip(scene_path).subclip(
                        str(start_time), str(end_time))

                    clip.write_videofile(
                        os.path.join(
                            CURRENT_DOWNLOAD_PATH,
                            f"start_time-{scene_start}-start_end-{scene_end}-"

コード例 #16

0

ファイルを表示

duration = len(vr)
print('The video contains %d frames' % duration)

################################################################
# If we want to access frame at index 10,

frame = vr[9]
print(frame.shape)

################################################################
# For deep learning, usually we want to get multiple frames at once. Now you can use ``get_batch`` function,
# Suppose we want to get a 32-frame video clip by skipping one frame in between,

frame_id_list = range(0, 64, 2)
frames = vr.get_batch(frame_id_list).asnumpy()
print(frames.shape)

################################################################
# There is another advanced functionality, you can get all the key frames as below,
key_indices = vr.get_key_indices()
key_frames = vr.get_batch(key_indices)
print(key_frames.shape)

################################################################
# Pretty flexible, right? Try it on your videos.

################################################################
# Speed comparison
# ----------------

コード例 #17

0

ファイルを表示

ファイル: dataset_classification.py プロジェクト: stephenkl/gluon-cv

    def loadvideo_decord(self, sample, sample_rate_scale=1):
        """Load video content using Decord"""
        # pylint: disable=line-too-long, bare-except, unnecessary-comprehension
        fname = self.data_path + sample

        if not (os.path.exists(fname)):
            return []

        # avoid hanging issue
        if os.path.getsize(fname) < 1 * 1024:
            print('SKIP: ', fname, " - ", os.path.getsize(fname))
            return []
        try:
            if self.keep_aspect_ratio:
                vr = VideoReader(fname, num_threads=1, ctx=cpu(0))
            else:
                vr = VideoReader(fname, width=self.new_width, height=self.new_height,
                                 num_threads=1, ctx=cpu(0))
        except:
            print("video cannot be loaded by decord: ", fname)
            return []

        if self.mode == 'test':
            all_index = [x for x in range(0, len(vr), self.frame_sample_rate)]
            while len(all_index) < self.clip_len:
                all_index.append(all_index[-1])
            vr.seek(0)
            buffer = vr.get_batch(all_index).asnumpy()
            return buffer
        # handle temporal segments
        converted_len = int(self.clip_len * self.frame_sample_rate)
        seg_len = len(vr) // self.num_segment

        all_index = []
        for i in range(self.num_segment):
            if seg_len <= converted_len:
                index = list(range(1, seg_len))[::self.frame_sample_rate]
                diff = self.clip_len - len(index)
                if diff > 0:
                    temp = int(seg_len / 2)
                    for j in range(diff):

                        while (temp in index):
                            temp += 1
                        index.append(temp)
                        if temp >= seg_len:
                            temp = 0
                index.sort()
                '''if len(index) == self.clip_len:
                    print('success')
                else:
                    print('no')'''

                # index = np.linspace(0, seg_len, num=seg_len // self.frame_sample_rate)
                # index = np.concatenate((index, np.ones(self.clip_len - seg_len // self.frame_sample_rate) * seg_len))
                # index = np.clip(index, 0, seg_len - 1).astype(np.int64)
            # elif seg_len == self.clip_len:
            # index = list(range(seg_len))
            else:
                index = list(range(1, seg_len))[::self.frame_sample_rate]
                diff = len(index) - self.clip_len
                if diff > 0:
                    front = 0
                    back = seg_len - 1
                    start_front = True
                    for j in range(diff):
                        if start_front:
                            while (front not in index):
                                front += 1
                            index.remove(front)
                            start_front = False
                        else:
                            while (back not in index):
                                back -= 1
                            index.remove(back)
                            start_front = True
                index.sort()
                '''if len(index) == self.clip_len:
                    print('success')
                else:
                    print('no')'''

                # end_idx = np.random.randint(converted_len, seg_len)
                # str_idx = end_idx - converted_len
                # index = np.linspace(str_idx, end_idx, num=self.clip_len)
                # index = np.clip(index, str_idx, end_idx - 1).astype(np.int64)
            index = np.array(index) + i * seg_len
            # print(len(index))
            all_index.extend(list(index))

        all_index = all_index[::int(sample_rate_scale)]
        vr.seek(0)
        if all_index[-1] >= seg_len:
            print(all_index)
            # print('error')
            t = 0
            while (t in all_index):
                t += 1
                if t == seg_len:
                    t = int(seg_len / 2)
                    break
            all_index[-1] = t
            all_index.sort()
            print(all_index)
            print(fname)
            print(len(all_index))
            print(seg_len)

        buffer = vr.get_batch(all_index).asnumpy()
        return buffer

コード例 #18

0

ファイルを表示

ファイル: utils.py プロジェクト: nepython/meet2pdf

def extract_frames(video_path,
                   frames_dir,
                   custom_coordinates,
                   start=-1,
                   end=-1,
                   seconds=0.1,
                   meet=True):
    """
    Extract frames from a video using decord's VideoReader
        :param video_path: path of the video
        :param frames_dir: the directory to save the frames
        :param overwrite: to overwrite frames that already exist?
        :param start: start frame
        :param end: end frame
        :param seconds: frame spacing
        :return: count of images saved
    """

    video_path = os.path.normpath(
        video_path)  # make the paths OS (Windows) compatible
    frames_dir = os.path.normpath(
        frames_dir)  # make the paths OS (Windows) compatible

    video_dir, video_filename = os.path.split(
        video_path)  # get the video path and filename from the path

    assert os.path.exists(video_path)  # assert the video file exists

    vidcap = cv2.VideoCapture(video_path)
    fps = int(vidcap.get(cv2.CAP_PROP_FPS))
    if fps == 0:
        return False
    seconds = int(seconds * fps)
    frameToStore = None

    try:
        vr = VideoReader(video_path, ctx=gpu(0))  # can set to cpu or gpu
    except:
        vr = VideoReader(video_path, ctx=cpu(0))  # can set to cpu or gpu

    if meet:
        shareScreenCoverage = {"h": float(0.75), "w": float(0.75)}
    else:
        shareScreenCoverage = {"h": float(1), "w": float(1)}
    if start < 0:  # if start isn't specified lets assume 0
        start = 0
    if end < 0:  # if end isn't specified assume the end of the video
        end = len(vr)

    frames_list = list(range(start, end, seconds))
    saved_count = 0
    frames = vr.get_batch(frames_list).asnumpy()

    for index, frame in zip(
            frames_list, frames):  # lets loop through the frames until the end
        save_path = os.path.join(
            frames_dir, video_filename,
            f"frame{saved_count}.jpg")  # create the save path
        newFrame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        h, w, dimension = newFrame.shape
        if meet:
            croppedImageAttributes = {
                "top": int(0.125 * h),
                "bottom": int(0.875 * h),
                "left": int(0),
                "right": int(0.75 * w),
            }
        else:
            croppedImageAttributes = {
                "top": int(custom_coordinates["top"] * h),
                "bottom": int((1 - custom_coordinates["bottom"]) * h),
                "left": int(custom_coordinates["left"] * w),
                "right": int((1 - custom_coordinates["right"]) * w),
            }
        # to crop Google meet slides frame only and ignore the speaker part of screen
        newFrame = newFrame[
            croppedImageAttributes["top"]:croppedImageAttributes["bottom"],
            croppedImageAttributes["left"]:croppedImageAttributes["right"], ]

        # have seen atleast 1 frame before.
        if frameToStore is not None:
            # compare new frame with last frame
            same: bool = CheckSimilarity(frameToStore, newFrame)
            # save last frame if last frame is not same as new frame
            if not same:
                cv2.imwrite(save_path,
                            frameToStore)  # save the extracted image
                saved_count += 1  # increment our counter by one
        frameToStore = newFrame

    # save the last image too if it was diff from prev frame
    if not same:
        cv2.imwrite(save_path, frameToStore)  # save the extracted image
        saved_count += 1
    return True

コード例 #19

0

ファイルを表示

class Processor:
    def __init__(self, model, categories, labeled_dataset=None):
        '''
        https://pytorch.org/hub/pytorch_vision_resnet/
        Classification algorithm based on the ResNet model
        Args:
            model: the trained model
            categories: classification labels
        '''
        self.model = model
        self.categories = categories
        self.model.eval()

    def classify(self, image):
        '''
        Classifies the input image
        Args:
            image: the input image object

        Returns:
            the most probable label based on the ResNet result
            the return should be an integer value
        '''
        # input image must be resized and normalized so that it is the same as the trained model
        preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])
        input_tensor = preprocess(image)
        input_batch = input_tensor.unsqueeze(
            0)  # create a mini-batch as expected by the model

        # move the input and model to GPU for speed if available
        if torch.cuda.is_available():
            input_batch = input_batch.to('cuda')
            self.model.to('cuda')

        # Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
        with torch.no_grad():
            output = self.model(input_batch)

        # The output has unnormalized scores. To get probabilities, you can run a softmax on it.
        probabilities = torch.nn.functional.softmax(output[0], dim=0)

        # Show the most probable class
        top_prob, top_catid = torch.topk(probabilities, 3)
        return top_catid.numpy(), top_prob.numpy()

    def classify_frames(self, skipping=1):
        '''
        function that can iterate through the video frames and classify each frame.
        Args:
            skipping: number of frames to skip while iterating through the video

        Returns:
            The top 3 labels with the highest probabilities.

        '''
        # transform the frames to numpy format
        frames = self.get_frames(skipping).asnumpy()

        results = dict()  # dictionary for storing the classification results
        for frame in frames:
            labels, prob = self.classify(Image.fromarray(frame))
            for i in range(len(labels)):
                if labels[i] in results:
                    results[labels[i]] = results[labels[i]] + prob[i]
                else:
                    results[labels[i]] = prob[i]

        # sort the dictionary in descending order
        sorted_list = sorted(results.items(), key=lambda x: x[1], reverse=True)
        # only return the labels
        # return [item[0] for item in sorted_list]
        return [sorted_list[i][0] for i in range(len(sorted_list)) if i < 3]

    def load_video(self, path):
        '''
        https://github.com/dmlc/decord#installation
        https://github.com/dmlc/decord/blob/master/examples/video_reader.ipynb
        A decord wrapper implemented per the instruction
        Load the video as an object
        Args:
            path: the path to the video file

        Returns:
            none
        '''
        self.vr = VideoReader(path, width=320, height=240, ctx=cpu(0))

    def get_frames(self, skipping=1):
        '''
        Get the sampled frames from the input video
        Args:
            skipping: number of frames to skip in when sampling

        Returns:
            the frames sampled from the video
        '''
        return self.vr.get_batch(range(0, len(self.vr) - 1, skipping))

    def get_average_rgb(self):
        '''
        Get the average RGB values for each color channel
        Args:
            None

        Returns:
            average RGB values
        '''
        frames = self.get_frames().asnumpy()
        rgb = np.zeros(3)
        for i in range(3):
            rgb[i] = np.mean(frames[:, :, :, i])

        return rgb

コード例 #20

0

ファイルを表示

def run_data_worker(idx, args, data_queue, safe_gap=10000, nframes=10):
    assert args.interval * nframes < safe_gap
    videos = [i for i in os.listdir(args.data_dir) if i.endswith('.mp4')]
    videos = [os.path.join(args.data_dir, i) for i in videos]

    sub_videos = []
    for i, v in enumerate(videos):
        if i % args.num_data_threads == idx:
            sub_videos.append(v)

    for video in sub_videos:
        txt = glob.glob(video + '_*.txt')[0]
        annos = read_anno_txt(txt)
        vr = VideoReader(video, ctx=cpu(idx))
        vid = os.path.basename(video)
        frame_ts_table = [
            int(vr.get_frame_timestamp(i)[1] * 1000) for i in range(len(vr))
        ]

        frame_ids = []
        t = fid = aid = 0  # three trace pointers

        # O(N+M), N=len(intervals), M=len(vr)
        while aid < len(annos) and \
                t < max(args.interval // 2, annos[aid]['Time'] - safe_gap):
            # Extract negative example
            # NOTE: for simplicity, this implementation the segment
            # from last positive anno to the video end
            while fid < len(vr) and frame_ts_table[fid] < t:
                fid += 1

            if len(frame_ids) < nframes:
                frame_ids.append(fid)
            else:
                frame_ids.pop(0)
                frame_ids.append(fid)

            if len(frame_ids) == nframes:
                frames = [
                    cv2.resize(img[:, :, ::-1], (640, 360))
                    for img in list(vr.get_batch(frame_ids).asnumpy())
                ]
                data_queue.put((frames, 0, vid, t))

            t += args.interval

            if t >= annos[aid]['Time'] - safe_gap:
                # Extract positive example
                frame_ids.clear()
                for i in range(nframes - 1, -1, -1):
                    # i = 9, 8, ..., 0 when nframes = 10
                    t = annos[aid]['Time'] - args.interval * i
                    while frame_ts_table[fid] < t:
                        fid += 1
                    frame_ids.append(fid)

                frames = [
                    cv2.resize(img[:, :, ::-1], (640, 360))
                    for img in list(vr.get_batch(frame_ids).asnumpy())
                ]
                data_queue.put((frames, 1, vid, t))
                frame_ids.clear()

                t = annos[aid]['Time'] + safe_gap
                while aid < len(annos) and t >= annos[aid]['Time'] - safe_gap:
                    aid += 1