def __load_set(self, set_file):
        with open(set_file) as f:
            lines = f.readlines()

        video_list = []
        text_list = []
        gt_list = []
        for line in lines:
            line = line.strip('\n')
            segs = line.split(' ')
            print('=>Load Video', segs)
            assert (len(segs) == 3)
            segs = [os.path.join(self.root, seg) for seg in segs]

            video_list.append(segs[0])
            cap = FFMPEG_VideoReader(segs[0])
            cap.initialize()
            #video_list.append(cap)
            print('Video: frames({})'.format(int(cap.nframes)))
            # Load text json file
            text = json.load(open(segs[1]))
            # Load GT json file
            gt = np.load(open(segs[2]))
            print('Gt : frames({})'.format(len(gt)))
            text_list.append(text)
            gt_list.append(gt)

        self.video_list = video_list
        self.text_list = text_list
        self.gt_list = gt_list
Esempio n. 2
0
def video_uniform_sample_n_frames_old(video_path, n_samples, max_dim):
    """
    Sample only n frames from the video.
    """

    raise Exception('Needs to add argument about resizing type')

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = cap.fps
    n_frames = cap.nframes
    duration = cap.duration
    step = duration / (n_samples)

    frames = []
    for i in range(n_samples):
        time_sec = i * step
        frame = cap.get_frame(time_sec)
        # resize frame to fit in the array, it's going to be used by caffe anyway
        frame = image_utils.resize_keep_aspect_ratio_max_dim(frame, max_dim)
        # frame encoded as uint and values are from 0-255
        # but caffe needs float32 and values from 0-1
        frame = frame.astype('float32') / float(255)
        frames.append(frame)

    # very important, or we'd have memory leakage
    cap.__del__()

    return frames
Esempio n. 3
0
def __play_video_ffmpeg(video_path, caption, window_name='window', speed=1):
    is_playing = True

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = float(cap.fps)
    n_frames = cap.nframes

    index = 0
    while True:
        if is_playing:
            time_sec = index / fps
            # increment by speed
            index += speed
            frame = cap.get_frame(time_sec)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_size = frame.shape

            # resize the frame
            f_width = 800
            resize_factor = float(f_width) / frame_size[1]
            f_height = int(frame_size[0] * resize_factor)
            frame_size = (f_width, f_height)
            frame = cv2.resize(src=frame,
                               dsize=frame_size,
                               interpolation=cv2.INTER_AREA)

            # write caption on frame
            top = int((f_height * 0.9))
            text_width = cv2.getTextSize(caption, font, 1.2, 1)[0][0] + 20
            cv2.rectangle(frame, (0, top - 22), (text_width, top + 10),
                          black_color, cv2.cv.CV_FILLED)
            cv2.putText(img=frame,
                        text=caption,
                        org=(10, top),
                        fontFace=font,
                        fontScale=1.2,
                        color=white_color,
                        thickness=1,
                        lineType=8)

            # show the frame
            cv2.imshow(window_name, frame)

            e = cv2.waitKey(2)
            if e == 27:
                break
            if e == 32:
                is_playing = False
                print('Pause video')
            # If the number of captured frames is equal to the total number of frames,we stop
            if index >= n_frames:
                break
        else:
            # toggle pause with 'space'
            e = cv2.waitKey(2)
            if e == 32:
                is_playing = True
                print('Play video')
Esempio n. 4
0
def get_regions(video_path, annot, resize_type, verbose=False):
    """
    Get the frames whose numbers are given in the "annot" dictionary.. Then, for each frame get the regions as specificed in the "annot" dictionary.
    Finally, return these regions.
    """

    assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled']

    resize_function = None
    if resize_type == 'resize':
        resize_function = image_utils.resize_frame
    elif resize_type == 'resize_crop':
        resize_function = image_utils.resize_crop
    elif resize_type == 'resize_crop_scaled':
        resize_function = image_utils.resize_crop_scaled

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = float(cap.fps)
    n_frames = cap.nframes
    duration = cap.duration
    n_regions = sum([len(v) for k, v in annot.iteritems()])

    frame_size = 224
    bbox_resize_factor = 2
    regions = np.zeros(shape=(n_regions, frame_size, frame_size, 3),
                       dtype='float32')
    region_idx = -1

    frame_nums = annot.keys()
    for frame_num in frame_nums:

        if (region_idx + 1) % 100 == 0 and verbose:
            print(' ... reading region %d/%d' % (region_idx + 1, n_regions))

        # get the frame
        i = frame_num - 1
        time_sec = i / fps
        frame = cap.get_frame(time_sec)

        # get the regions (resized) from the frame
        regions_info = annot[frame_num]
        for region_info in regions_info:
            region_idx += 1
            bbox = region_info[1:5]
            bbox = np.multiply(bbox, bbox_resize_factor).astype(np.int)
            x1, y1, x2, y2 = bbox
            region = frame[y1:y2, x1:x2]
            # resize frame to fit in the array, it's going to be used by caffe anyway
            region = resize_function(region)
            # frame encoded as uint and values are from 0-255, but caffe needs float32 and values from 0-1
            region = region.astype('float32') / float(255)
            regions[region_idx] = region

    # very important, or we'd have memory leakage
    cap.__del__()

    return regions
Esempio n. 5
0
def get_video_info(video_path):
    # video_fps, video_n_frames, video_duration = video_utils.

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = cap.fps
    n_frames = cap.nframes
    duration = cap.duration
    cap.close()
    del cap

    return fps, n_frames, duration
Esempio n. 6
0
def video_save_frames_specific_duration(action_num,
                                        video_num,
                                        video_path,
                                        frames_root_pathes,
                                        start_stop_sec,
                                        image_name_format,
                                        verbose=False):
    assert len(frames_root_pathes) == len(start_stop_sec)

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = float(cap.fps)
    duration_sec = cap.duration
    img_dim = 224

    start_stop_sec = np.array(start_stop_sec)

    for i, s_s_sec in enumerate(start_stop_sec):
        start_sec, stop_sec = s_s_sec
        frame_root_path = frames_root_pathes[i]

        # offset of starting/stopping the action
        sec_offset = 0.25

        start_idx = int((start_sec + sec_offset) * fps)
        stop_idx = int((stop_sec + sec_offset) * fps) + 1

        if verbose:
            print('action, video: %d, %d' % (action_num, video_num))
            print('%d/%d' % (start_sec, stop_sec))
            print('%d/%d' % (start_idx, stop_idx))

        for idx_frame in range(start_idx, stop_idx):
            time_sec = idx_frame / fps
            if verbose and idx_frame % 100 == 0:
                print('... time_sec, frame: %d/%d' % (time_sec, idx_frame))

            frame = cap.get_frame(time_sec)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            frame = image_utils.resize_crop(frame,
                                            target_width=img_dim,
                                            target_height=img_dim)

            image_name = image_name_format % (idx_frame, )
            frame_path = os.path.join(frame_root_path, image_name)
            cv2.imwrite(frame_path, frame)

    # very important, or we'd have memory leakage
    cap.__del__()
Esempio n. 7
0
def video_uniform_sample_and_save_old(spf,
                                      video_path,
                                      frames_path,
                                      image_name_format,
                                      resize_type,
                                      verbose=False):
    if resize_type is not None:
        assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled']

    resize_function = None
    if resize_type == 'resize':
        resize_function = image_utils.resize_frame
    elif resize_type == 'resize_crop':
        resize_function = image_utils.resize_crop
    elif resize_type == 'resize_crop_scaled':
        resize_function = image_utils.resize_crop_scaled

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = cap.fps
    n_frames = cap.nframes
    duration = cap.duration
    n_samples = int(duration / float(spf))

    # check if no samples because the video duration is less than spf
    # then at least, get 1 frame of the video
    if n_samples == 0:
        n_samples = 1

    for i in range(n_samples):
        num = i + 1
        if verbose:
            print(' ... reading frame %d/%d' % (num, n_samples))
        time_sec = i * spf
        frame = cap.get_frame(time_sec)

        if resize_type is not None:
            # resize frame to fit in the array, it's going to be used by caffe anyway
            frame = resize_function(frame)

        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        image_name = image_name_format % (num, )
        frame_path = os.path.join(frames_path, image_name)
        cv2.imwrite(frame_path, frame)

    # very important, or we'd have memory leakage
    cap.close()

    return fps, n_frames, duration
def save_frames_from_vid(video_name, category_id, train_or_test, row=0):

    make_dir_structure(row)

    # Initialize FFMPEG_VideoReader
    fvr = FFMPEG_VideoReader(filename=video_name)
    fvr.initialize()
    vid = os.path.split(video_name)[1]
    for i in range(0, fvr.nframes):
        frame_name = vid + '_' + str(i)
        frame = fvr.read_frame()
        imsave(os.path.join('trafficdb', 'eval_'+ str(row), train_or_test, str(category_id), frame_name + '.jpg'),
               frame)

    return True
def get_frames_from_vid(video_name, category_id):
    # Initialize FFMPEG_VideoReader
    fvr = FFMPEG_VideoReader(filename=video_name)
    fvr.initialize()
    shape_for_stack = (1, fvr.size[0], fvr.size[1], fvr.depth)
    img_stack = np.zeros(shape_for_stack)

    for i in range(0, fvr.nframes):
        frame = fvr.read_frame()
        frame = frame.reshape(shape_for_stack)
        img_stack = np.vstack((img_stack, frame))

    img_stack = img_stack[1:]
    cat_stack = np.ones((len(img_stack), 1)) * category_id

    return img_stack, cat_stack
Esempio n. 10
0
def video_uniform_sampling(spf,
                           video_path,
                           resize_type,
                           is_local,
                           verbose=False):
    assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled']

    resize_function = None
    if resize_type == 'resize':
        resize_function = image_utils.resize_frame
    elif resize_type == 'resize_crop':
        resize_function = image_utils.resize_crop
    elif resize_type == 'resize_crop_scaled':
        resize_function = image_utils.resize_crop_scaled

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = cap.fps
    n_frames = cap.nframes
    duration = cap.duration
    n_samples = int(duration / float(spf))

    # check if no samples because the video duration is less than spf
    # then at least, get 1 frame of the video
    if n_samples == 0:
        n_samples = 1

    frame_size = 224
    frames = np.zeros(shape=(n_samples, frame_size, frame_size, 3),
                      dtype='float32')
    for i in range(n_samples):
        num = i + 1
        if num % 100 == 0 and verbose:
            print(' ... reading frame %d/%d' % (num, n_samples))
        time_sec = i * spf
        frame = cap.get_frame(time_sec)
        # resize frame to fit in the array, it's going to be used by caffe anyway
        frame = resize_function(frame)
        # frame encoded as uint and values are from 0-255
        # but caffe needs float32 and values from 0-1
        frame = frame.astype('float32') / float(255)
        frames[i] = frame

    # very important, or we'd have memory leakage
    cap.__del__()

    return frames, fps, n_frames, duration
    def __getitem__(self, index):
        # Find the video first.
        vid = np.histogram(index, self.sums)
        assert (np.sum(vid[0]) == 1)
        vid = np.where(vid[0] > 0)[0][0]

        v_fmax = len(self.gt_list[vid])
        vframe = index - self.sums[vid]
        #vframes = [min(vframe + i, len(self.gt_list[vid])- 1) for i in np.arange(0, 1 +10*self.multi_frame, 10)]
        #vframes = [min(vframe, len(self.gt_list[vid])- 1)]
        #cap = self.video_list[vid]

        imgs = []
        if self.prod_Img:

            cap = FFMPEG_VideoReader(self.video_list[vid])
            cap.initialize()

            for i in range(self.multi_frame):
                if i == 0:
                    img = cap.get_frame(vframe / cap.fps)
                else:
                    cap.skip_frames(n=9)
                    img = cap.read_frame()

                img = Image.fromarray(img)
                if self.transform is not None:
                    img = self.transform(img)
                imgs.append(img)
            '''
            for v in vframes:
                #cap = cv2.VideoCapture(self.video_list[vid])
                #assert cap.isOpened() == True, 'The Video cannot be read:{}'.format(self.video_list[vid])
                

                #cap.set(1, v)
                #ret, frame= cap.read()
                img = cap.get_frame(v)
                #assert ret == True, 'Cannot Load this frame{}:{}'.format(self.video_list[vid], v)
                #cv2_im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                if self.transform is not None:
                    img = self.transform(img)
                imgs.append(img)    
            '''
            imgs = [img.unsqueeze(0) for img in imgs]
            imgs = torch.cat(imgs, 0)

        text = []
        if self.prod_Text:
            text = self.text_list[vid][
                min(vframe + self.text_delay, len(self.text_list[vid])
                    ):min(vframe + self.text_window +
                          self.text_delay, len(self.text_list[vid]))]
            text = '\n'.join(text)

        gt = self.gt_list[vid][vframe]

        if (len(text) == 0):
            text = ' '
        #text = text_util.lineToTensor(text)

        return imgs, text, gt
Esempio n. 12
0
def play_video_specific_frames_matplotlib(video_path, seconds, caption=''):
    """
    Play video. Show only frames at given seconds.
    """

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = float(cap.fps)
    speed = 1
    sec_idx = -1
    n_secs = len(seconds)
    window_name = 'Window_Title'

    plt.figure(window_name)
    plt.ion()
    plt.axis('off')

    global is_exit
    global is_playing

    is_exit = False
    is_playing = True

    def __key_press(event):
        event_key = event.key
        if event_key == 'escape':
            global is_exit
            is_exit = True
        elif event_key == ' ':
            global is_playing
            is_playing = not is_playing

    fig = plt.gcf()
    fig.canvas.mpl_connect('key_press_event', __key_press)

    while True:
        if is_exit:
            break
        if is_playing:
            sec_idx += 1

            # finish condition
            if sec_idx >= n_secs:
                break

            second = seconds[sec_idx]
            frame = cap.get_frame(second)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_size = frame.shape

            # resize the frame
            f_width = 800
            resize_factor = float(f_width) / frame_size[1]
            f_height = int(frame_size[0] * resize_factor)
            frame_size = (f_width, f_height)
            frame = cv2.resize(src=frame,
                               dsize=frame_size,
                               interpolation=cv2.INTER_AREA)

            # write caption
            plt.title(caption)

            # show the frame
            frame = frame[:, :, (2, 1, 0)]
            plt.imshow(frame)

        # in both case, pause figure to capture key press
        plt.pause(0.01)

    plt.close()
Esempio n. 13
0
def play_video_specific_frames(video_path, seconds, caption=''):
    """
    Play video. Show only frames at given seconds.
    """

    is_playing = True

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = float(cap.fps)
    speed = 1
    sec_idx = -1
    n_secs = len(seconds)
    window_name = 'Window_Title'

    while True:
        if is_playing:
            sec_idx += 1

            # finish condition
            if sec_idx >= n_secs:
                break

            second = seconds[sec_idx]
            frame = cap.get_frame(second)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_size = frame.shape

            # resize the frame
            f_width = 800
            resize_factor = float(f_width) / frame_size[1]
            f_height = int(frame_size[0] * resize_factor)
            frame_size = (f_width, f_height)
            frame = cv2.resize(src=frame,
                               dsize=frame_size,
                               interpolation=cv2.INTER_AREA)

            # write caption on frame
            top = int((f_height * 0.9))
            text_width = cv2.getTextSize(caption, font, 1.2, 1)[0][0] + 20
            cv2.rectangle(frame, (0, top - 22), (text_width, top + 10),
                          black_color, -1)
            cv2.putText(img=frame,
                        text=caption,
                        org=(10, top),
                        fontFace=font,
                        fontScale=1.2,
                        color=white_color,
                        thickness=1,
                        lineType=8)

            # show the frame
            cv2.imshow(window_name, frame)

            e = cv2.waitKey(1)
            if e == 27:
                break
            if e == 32:
                is_playing = False
                print('Pause video')
        else:
            # toggle pause with 'space'
            e = cv2.waitKey(2)
            if e == 32:
                is_playing = True
                print('Play video')
Esempio n. 14
0
	# end

	if True:
		tensorInputFirst.cpu()
		tensorInputSecond.cpu()
		tensorOutput.cpu()
	# end
#end

tensorOutput = torch.FloatTensor()

if arguments_strVideo and arguments_strVideoOut:
	# Process video
	reader = FFMPEG_VideoReader(arguments_strVideo, False)
	writer = FFMPEG_VideoWriter(arguments_strVideoOut, reader.size, reader.fps*2)
	reader.initialize()
	nextFrame = reader.read_frame()
	for x in range(0, reader.nframes):
		firstFrame = nextFrame
		nextFrame = reader.read_frame()
		tensorInputFirst = torch.FloatTensor(numpy.rollaxis(firstFrame[:,:,::-1], 2, 0) / 255.0)
		tensorInputSecond = torch.FloatTensor(numpy.rollaxis(nextFrame[:,:,::-1], 2, 0) / 255.0)
		process(tensorInputFirst, tensorInputSecond, tensorOutput)
		writer.write_frame(firstFrame)
		writer.write_frame((numpy.rollaxis(tensorOutput.clamp(0.0, 1.0).numpy(), 0, 3)[:,:,::-1] * 255.0).astype(numpy.uint8))
	#end
	writer.write_frame(nextFrame)
	writer.close()
else:
	# Process image
	tensorInputFirst = torch.FloatTensor(numpy.rollaxis(numpy.asarray(PIL.Image.open(arguments_strFirst))[:,:,::-1], 2, 0).astype(numpy.float32) / 255.0)
Esempio n. 15
0
#!/usr/bin/python

import sys
import os
import numpy as np
import cv2
import datetime
from moviepy.video.io.ffmpeg_reader import FFMPEG_VideoReader

#sys.argv=['','359_CTMxCTF_6.mp4',2]

cap = cv2.VideoCapture(sys.argv[1])

rec = FFMPEG_VideoReader(sys.argv[1], True)
rec.initialize()

frameWidth = int(cap.get(3))
frameHeight = int(cap.get(4))
frameNum = int(cap.get(7))
fps = int(cap.get(5))

fgbg = cv2.createBackgroundSubtractorMOG2(detectShadows=False)

FrameDiff = np.array([])

lapse = int(sys.argv[2])

i = 0

while (i < frameNum):
    ret, frame = cap.read()