def __load_set(self, set_file): with open(set_file) as f: lines = f.readlines() video_list = [] text_list = [] gt_list = [] for line in lines: line = line.strip('\n') segs = line.split(' ') print('=>Load Video', segs) assert (len(segs) == 3) segs = [os.path.join(self.root, seg) for seg in segs] video_list.append(segs[0]) cap = FFMPEG_VideoReader(segs[0]) cap.initialize() #video_list.append(cap) print('Video: frames({})'.format(int(cap.nframes))) # Load text json file text = json.load(open(segs[1])) # Load GT json file gt = np.load(open(segs[2])) print('Gt : frames({})'.format(len(gt))) text_list.append(text) gt_list.append(gt) self.video_list = video_list self.text_list = text_list self.gt_list = gt_list
def video_uniform_sample_n_frames_old(video_path, n_samples, max_dim): """ Sample only n frames from the video. """ raise Exception('Needs to add argument about resizing type') cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = cap.fps n_frames = cap.nframes duration = cap.duration step = duration / (n_samples) frames = [] for i in range(n_samples): time_sec = i * step frame = cap.get_frame(time_sec) # resize frame to fit in the array, it's going to be used by caffe anyway frame = image_utils.resize_keep_aspect_ratio_max_dim(frame, max_dim) # frame encoded as uint and values are from 0-255 # but caffe needs float32 and values from 0-1 frame = frame.astype('float32') / float(255) frames.append(frame) # very important, or we'd have memory leakage cap.__del__() return frames
def __play_video_ffmpeg(video_path, caption, window_name='window', speed=1): is_playing = True cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = float(cap.fps) n_frames = cap.nframes index = 0 while True: if is_playing: time_sec = index / fps # increment by speed index += speed frame = cap.get_frame(time_sec) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_size = frame.shape # resize the frame f_width = 800 resize_factor = float(f_width) / frame_size[1] f_height = int(frame_size[0] * resize_factor) frame_size = (f_width, f_height) frame = cv2.resize(src=frame, dsize=frame_size, interpolation=cv2.INTER_AREA) # write caption on frame top = int((f_height * 0.9)) text_width = cv2.getTextSize(caption, font, 1.2, 1)[0][0] + 20 cv2.rectangle(frame, (0, top - 22), (text_width, top + 10), black_color, cv2.cv.CV_FILLED) cv2.putText(img=frame, text=caption, org=(10, top), fontFace=font, fontScale=1.2, color=white_color, thickness=1, lineType=8) # show the frame cv2.imshow(window_name, frame) e = cv2.waitKey(2) if e == 27: break if e == 32: is_playing = False print('Pause video') # If the number of captured frames is equal to the total number of frames,we stop if index >= n_frames: break else: # toggle pause with 'space' e = cv2.waitKey(2) if e == 32: is_playing = True print('Play video')
def get_regions(video_path, annot, resize_type, verbose=False): """ Get the frames whose numbers are given in the "annot" dictionary.. Then, for each frame get the regions as specificed in the "annot" dictionary. Finally, return these regions. """ assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled'] resize_function = None if resize_type == 'resize': resize_function = image_utils.resize_frame elif resize_type == 'resize_crop': resize_function = image_utils.resize_crop elif resize_type == 'resize_crop_scaled': resize_function = image_utils.resize_crop_scaled cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = float(cap.fps) n_frames = cap.nframes duration = cap.duration n_regions = sum([len(v) for k, v in annot.iteritems()]) frame_size = 224 bbox_resize_factor = 2 regions = np.zeros(shape=(n_regions, frame_size, frame_size, 3), dtype='float32') region_idx = -1 frame_nums = annot.keys() for frame_num in frame_nums: if (region_idx + 1) % 100 == 0 and verbose: print(' ... reading region %d/%d' % (region_idx + 1, n_regions)) # get the frame i = frame_num - 1 time_sec = i / fps frame = cap.get_frame(time_sec) # get the regions (resized) from the frame regions_info = annot[frame_num] for region_info in regions_info: region_idx += 1 bbox = region_info[1:5] bbox = np.multiply(bbox, bbox_resize_factor).astype(np.int) x1, y1, x2, y2 = bbox region = frame[y1:y2, x1:x2] # resize frame to fit in the array, it's going to be used by caffe anyway region = resize_function(region) # frame encoded as uint and values are from 0-255, but caffe needs float32 and values from 0-1 region = region.astype('float32') / float(255) regions[region_idx] = region # very important, or we'd have memory leakage cap.__del__() return regions
def get_video_info(video_path): # video_fps, video_n_frames, video_duration = video_utils. cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = cap.fps n_frames = cap.nframes duration = cap.duration cap.close() del cap return fps, n_frames, duration
def video_save_frames_specific_duration(action_num, video_num, video_path, frames_root_pathes, start_stop_sec, image_name_format, verbose=False): assert len(frames_root_pathes) == len(start_stop_sec) cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = float(cap.fps) duration_sec = cap.duration img_dim = 224 start_stop_sec = np.array(start_stop_sec) for i, s_s_sec in enumerate(start_stop_sec): start_sec, stop_sec = s_s_sec frame_root_path = frames_root_pathes[i] # offset of starting/stopping the action sec_offset = 0.25 start_idx = int((start_sec + sec_offset) * fps) stop_idx = int((stop_sec + sec_offset) * fps) + 1 if verbose: print('action, video: %d, %d' % (action_num, video_num)) print('%d/%d' % (start_sec, stop_sec)) print('%d/%d' % (start_idx, stop_idx)) for idx_frame in range(start_idx, stop_idx): time_sec = idx_frame / fps if verbose and idx_frame % 100 == 0: print('... time_sec, frame: %d/%d' % (time_sec, idx_frame)) frame = cap.get_frame(time_sec) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame = image_utils.resize_crop(frame, target_width=img_dim, target_height=img_dim) image_name = image_name_format % (idx_frame, ) frame_path = os.path.join(frame_root_path, image_name) cv2.imwrite(frame_path, frame) # very important, or we'd have memory leakage cap.__del__()
def video_uniform_sample_and_save_old(spf, video_path, frames_path, image_name_format, resize_type, verbose=False): if resize_type is not None: assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled'] resize_function = None if resize_type == 'resize': resize_function = image_utils.resize_frame elif resize_type == 'resize_crop': resize_function = image_utils.resize_crop elif resize_type == 'resize_crop_scaled': resize_function = image_utils.resize_crop_scaled cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = cap.fps n_frames = cap.nframes duration = cap.duration n_samples = int(duration / float(spf)) # check if no samples because the video duration is less than spf # then at least, get 1 frame of the video if n_samples == 0: n_samples = 1 for i in range(n_samples): num = i + 1 if verbose: print(' ... reading frame %d/%d' % (num, n_samples)) time_sec = i * spf frame = cap.get_frame(time_sec) if resize_type is not None: # resize frame to fit in the array, it's going to be used by caffe anyway frame = resize_function(frame) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) image_name = image_name_format % (num, ) frame_path = os.path.join(frames_path, image_name) cv2.imwrite(frame_path, frame) # very important, or we'd have memory leakage cap.close() return fps, n_frames, duration
def save_frames_from_vid(video_name, category_id, train_or_test, row=0): make_dir_structure(row) # Initialize FFMPEG_VideoReader fvr = FFMPEG_VideoReader(filename=video_name) fvr.initialize() vid = os.path.split(video_name)[1] for i in range(0, fvr.nframes): frame_name = vid + '_' + str(i) frame = fvr.read_frame() imsave(os.path.join('trafficdb', 'eval_'+ str(row), train_or_test, str(category_id), frame_name + '.jpg'), frame) return True
def get_frames_from_vid(video_name, category_id): # Initialize FFMPEG_VideoReader fvr = FFMPEG_VideoReader(filename=video_name) fvr.initialize() shape_for_stack = (1, fvr.size[0], fvr.size[1], fvr.depth) img_stack = np.zeros(shape_for_stack) for i in range(0, fvr.nframes): frame = fvr.read_frame() frame = frame.reshape(shape_for_stack) img_stack = np.vstack((img_stack, frame)) img_stack = img_stack[1:] cat_stack = np.ones((len(img_stack), 1)) * category_id return img_stack, cat_stack
def video_uniform_sampling(spf, video_path, resize_type, is_local, verbose=False): assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled'] resize_function = None if resize_type == 'resize': resize_function = image_utils.resize_frame elif resize_type == 'resize_crop': resize_function = image_utils.resize_crop elif resize_type == 'resize_crop_scaled': resize_function = image_utils.resize_crop_scaled cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = cap.fps n_frames = cap.nframes duration = cap.duration n_samples = int(duration / float(spf)) # check if no samples because the video duration is less than spf # then at least, get 1 frame of the video if n_samples == 0: n_samples = 1 frame_size = 224 frames = np.zeros(shape=(n_samples, frame_size, frame_size, 3), dtype='float32') for i in range(n_samples): num = i + 1 if num % 100 == 0 and verbose: print(' ... reading frame %d/%d' % (num, n_samples)) time_sec = i * spf frame = cap.get_frame(time_sec) # resize frame to fit in the array, it's going to be used by caffe anyway frame = resize_function(frame) # frame encoded as uint and values are from 0-255 # but caffe needs float32 and values from 0-1 frame = frame.astype('float32') / float(255) frames[i] = frame # very important, or we'd have memory leakage cap.__del__() return frames, fps, n_frames, duration
def __getitem__(self, index): # Find the video first. vid = np.histogram(index, self.sums) assert (np.sum(vid[0]) == 1) vid = np.where(vid[0] > 0)[0][0] v_fmax = len(self.gt_list[vid]) vframe = index - self.sums[vid] #vframes = [min(vframe + i, len(self.gt_list[vid])- 1) for i in np.arange(0, 1 +10*self.multi_frame, 10)] #vframes = [min(vframe, len(self.gt_list[vid])- 1)] #cap = self.video_list[vid] imgs = [] if self.prod_Img: cap = FFMPEG_VideoReader(self.video_list[vid]) cap.initialize() for i in range(self.multi_frame): if i == 0: img = cap.get_frame(vframe / cap.fps) else: cap.skip_frames(n=9) img = cap.read_frame() img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) imgs.append(img) ''' for v in vframes: #cap = cv2.VideoCapture(self.video_list[vid]) #assert cap.isOpened() == True, 'The Video cannot be read:{}'.format(self.video_list[vid]) #cap.set(1, v) #ret, frame= cap.read() img = cap.get_frame(v) #assert ret == True, 'Cannot Load this frame{}:{}'.format(self.video_list[vid], v) #cv2_im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) imgs.append(img) ''' imgs = [img.unsqueeze(0) for img in imgs] imgs = torch.cat(imgs, 0) text = [] if self.prod_Text: text = self.text_list[vid][ min(vframe + self.text_delay, len(self.text_list[vid]) ):min(vframe + self.text_window + self.text_delay, len(self.text_list[vid]))] text = '\n'.join(text) gt = self.gt_list[vid][vframe] if (len(text) == 0): text = ' ' #text = text_util.lineToTensor(text) return imgs, text, gt
def play_video_specific_frames_matplotlib(video_path, seconds, caption=''): """ Play video. Show only frames at given seconds. """ cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = float(cap.fps) speed = 1 sec_idx = -1 n_secs = len(seconds) window_name = 'Window_Title' plt.figure(window_name) plt.ion() plt.axis('off') global is_exit global is_playing is_exit = False is_playing = True def __key_press(event): event_key = event.key if event_key == 'escape': global is_exit is_exit = True elif event_key == ' ': global is_playing is_playing = not is_playing fig = plt.gcf() fig.canvas.mpl_connect('key_press_event', __key_press) while True: if is_exit: break if is_playing: sec_idx += 1 # finish condition if sec_idx >= n_secs: break second = seconds[sec_idx] frame = cap.get_frame(second) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_size = frame.shape # resize the frame f_width = 800 resize_factor = float(f_width) / frame_size[1] f_height = int(frame_size[0] * resize_factor) frame_size = (f_width, f_height) frame = cv2.resize(src=frame, dsize=frame_size, interpolation=cv2.INTER_AREA) # write caption plt.title(caption) # show the frame frame = frame[:, :, (2, 1, 0)] plt.imshow(frame) # in both case, pause figure to capture key press plt.pause(0.01) plt.close()
def play_video_specific_frames(video_path, seconds, caption=''): """ Play video. Show only frames at given seconds. """ is_playing = True cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = float(cap.fps) speed = 1 sec_idx = -1 n_secs = len(seconds) window_name = 'Window_Title' while True: if is_playing: sec_idx += 1 # finish condition if sec_idx >= n_secs: break second = seconds[sec_idx] frame = cap.get_frame(second) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_size = frame.shape # resize the frame f_width = 800 resize_factor = float(f_width) / frame_size[1] f_height = int(frame_size[0] * resize_factor) frame_size = (f_width, f_height) frame = cv2.resize(src=frame, dsize=frame_size, interpolation=cv2.INTER_AREA) # write caption on frame top = int((f_height * 0.9)) text_width = cv2.getTextSize(caption, font, 1.2, 1)[0][0] + 20 cv2.rectangle(frame, (0, top - 22), (text_width, top + 10), black_color, -1) cv2.putText(img=frame, text=caption, org=(10, top), fontFace=font, fontScale=1.2, color=white_color, thickness=1, lineType=8) # show the frame cv2.imshow(window_name, frame) e = cv2.waitKey(1) if e == 27: break if e == 32: is_playing = False print('Pause video') else: # toggle pause with 'space' e = cv2.waitKey(2) if e == 32: is_playing = True print('Play video')
# end if True: tensorInputFirst.cpu() tensorInputSecond.cpu() tensorOutput.cpu() # end #end tensorOutput = torch.FloatTensor() if arguments_strVideo and arguments_strVideoOut: # Process video reader = FFMPEG_VideoReader(arguments_strVideo, False) writer = FFMPEG_VideoWriter(arguments_strVideoOut, reader.size, reader.fps*2) reader.initialize() nextFrame = reader.read_frame() for x in range(0, reader.nframes): firstFrame = nextFrame nextFrame = reader.read_frame() tensorInputFirst = torch.FloatTensor(numpy.rollaxis(firstFrame[:,:,::-1], 2, 0) / 255.0) tensorInputSecond = torch.FloatTensor(numpy.rollaxis(nextFrame[:,:,::-1], 2, 0) / 255.0) process(tensorInputFirst, tensorInputSecond, tensorOutput) writer.write_frame(firstFrame) writer.write_frame((numpy.rollaxis(tensorOutput.clamp(0.0, 1.0).numpy(), 0, 3)[:,:,::-1] * 255.0).astype(numpy.uint8)) #end writer.write_frame(nextFrame) writer.close() else: # Process image tensorInputFirst = torch.FloatTensor(numpy.rollaxis(numpy.asarray(PIL.Image.open(arguments_strFirst))[:,:,::-1], 2, 0).astype(numpy.float32) / 255.0)
#!/usr/bin/python import sys import os import numpy as np import cv2 import datetime from moviepy.video.io.ffmpeg_reader import FFMPEG_VideoReader #sys.argv=['','359_CTMxCTF_6.mp4',2] cap = cv2.VideoCapture(sys.argv[1]) rec = FFMPEG_VideoReader(sys.argv[1], True) rec.initialize() frameWidth = int(cap.get(3)) frameHeight = int(cap.get(4)) frameNum = int(cap.get(7)) fps = int(cap.get(5)) fgbg = cv2.createBackgroundSubtractorMOG2(detectShadows=False) FrameDiff = np.array([]) lapse = int(sys.argv[2]) i = 0 while (i < frameNum): ret, frame = cap.read()