def extractSlides(videoPath): print(f"Reading {videoPath.as_posix()}...") vr = VideoReader(videoPath.as_posix(), ctx=cpu(0)) fps = vr.get_avg_fps() print(f"Successfully read. FPS: {fps}") slides = [] frameCount = 1 prevImageHash = None imageChanged = False for i in trange(0, len(vr), int(fps)): frame = vr[i].asnumpy() pilImage = Image.fromarray(frame) prevImageHash = imagehash.average_hash(pilImage) if not prevImageHash else currentImageHash currentImageHash = imagehash.average_hash(pilImage) imageDiff = currentImageHash - prevImageHash if imageChanged and imageDiff < DIFF_THRESHOLD: slides.append(pilImage) imageChanged = False if imageDiff > DIFF_THRESHOLD: imageChanged = True return slides
def extract_frames_from_video(video_file, video_id, target_dir): results = [] """ for each video file creates the corresponding directory if not exists csv item: video_id;path_to_frame;frame_index;avg_fps;yolo3_classes;caption;score; """ # a file like object works as well, for in-memory decoding with open(video_file, 'rb') as f: vr = VideoReader(f, ctx=cpu(0)) print('video frames:', len(vr)) total_frames = len(vr) avg_fps = int(vr.get_avg_fps()) # 1. the simplest way is to directly access frames print('get_avg_fps=', vr.get_avg_fps()) for i in range(0, len(vr), avg_fps): # # the video reader will handle seeking and skipping in the most efficient manner frame = vr[i] save_path = os.path.join(target_dir,"{:010d}.jpg".format(i)) if not os.path.exists(save_path): print(frame.shape) img = frame.asnumpy() img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) detected_classes_list = detect_objects_single_image(img) words = get_caption_single_image(img) score= get_score(words, detected_classes_list) if score > 2: frame_result = [] frame_result.append(video_id) frame_result.append(save_path) frame_result.append(i) frame_result.append(avg_fps) frame_result.append(detected_classes_list) frame_result.append(words) frame_result.append(score) cv2.imwrite(save_path, img) results.extend(frame_result) return results
import cv2 import os from decord import VideoReader from decord import cpu, gpu <<<<<<< HEAD import easyocr reader = easyocr.Reader(['en']) video = VideoReader("vim.mp4") print(len(video)) current_frame = 0 fps = int(round(video.get_avg_fps())) q = 0 text = open('text.txt', 'w') time = open('time.txt', 'w') for current_frame in range(0, len(video), 5 * fps): if current_frame % (len(video) // 100) < 5 * fps: print(str(q) + '%', current_frame) q += 1 image = video[current_frame].asnumpy() name = 'tmp/' + str(current_frame) + '.jpg' cv2.imwrite(name, image) result = [[x[1], x[2]] for x in reader.readtext(f'{name}')] result = list(filter(lambda x: x[1] > 0.5, result)) for i in result: for x in i[0].split(): text.write(x + '\n') time.write(str(current_frame // fps) + '\n') os.remove(name)
def process_frames(original_filename, output_filename, mask_filename, thumbnail_filename): # change to gpu(0) for faster processing vr = VideoReader(original_filename, ctx=cpu(0)) height, width, layers = vr[0].shape print(f'\u001b[33mInput frame {height}x{width}x{layers}\u001b[0m') fourcc = cv2.VideoWriter_fourcc(*'FFV1') video = cv2.VideoWriter(output_filename + '.lossless.mkv', fourcc, vr.get_avg_fps(), (width, height)) video_mask = cv2.VideoWriter(mask_filename + '.lossless.mkv', fourcc, vr.get_avg_fps(), (320, 320)) # solid color image keying_bg = create_keying_background(width, height, (0, 255, 0)) for frame in vr: # convert to numpy format frame_np = frame.asnumpy() # run u2net mask_np = u2net.run(frame_np) # write frame to mask video mask_np_uint8 = (mask_np * 255).astype(np.uint8) mask_np_bgr = np.stack([mask_np_uint8] * 3, axis=-1) # https://stackoverflow.com/a/40119878 video_mask.write(mask_np_bgr) # resize u2net output (320x320) to original frame resolution mask_cv2 = cv2.resize(mask_np, (width, height)) # scale mask values from the range [0, 1] to [0, 255] mask_cv2_uint8 = (mask_cv2 * 255).astype(np.uint8) # thresholding the mask to have clear outlines ret, mask_cv2_uint8 = cv2.threshold(mask_cv2_uint8, 10, 255, cv2.THRESH_BINARY) # compute inverse mask mask_cv2_uint8_inv = cv2.bitwise_not(mask_cv2_uint8) # apply mask to image and merge with keying background frame_fg = cv2.bitwise_and(frame_np, frame_np, mask=mask_cv2_uint8) frame_bg = cv2.bitwise_and(keying_bg, keying_bg, mask=mask_cv2_uint8_inv) output_cv2 = frame_fg + frame_bg # convert the color space back to BGR output_cv2 = cv2.cvtColor(output_cv2, cv2.COLOR_RGB2BGR) video.write(output_cv2) cv2.destroyAllWindows() video.release() video_mask.release() # encode videos to h264 thumbnail_proc = start_thumbnail(output_filename + '.lossless.mkv', thumbnail_filename) video_enc_proc = start_encode_video(output_filename + '.lossless.mkv', output_filename) video_mask_enc_proc = start_encode_video(mask_filename + '.lossless.mkv', mask_filename) assert thumbnail_proc.wait() == 0, 'Thumbnail encoding failed' assert video_enc_proc.wait() == 0, 'Video encoding failed' assert video_mask_enc_proc.wait() == 0, 'Mask video encoding failed'
scene_files = glob(os.path.join(CURRENT_DOWNLOAD_PATH, "*Scene*.mp4")) for path in scene_files: scene_to_json(path, outfolder=CURRENT_DOWNLOAD_PATH) # parsing the scenes processed_files = glob(os.path.join(CURRENT_DOWNLOAD_PATH, "*Scene*.json")) for file_path in tqdm(processed_files, disable=False): features = json.load(open(file_path, "r")) scene_path = os.path.join( f"{CURRENT_DOWNLOAD_PATH}", os.path.basename(file_path).split(".")[0] + ".mp4", ) # check if the scene in present if scene_path in scene_files: scene = VideoReader(scene_path, ctx=cpu(0)) fps = scene.get_avg_fps() # extracting all the face utterances from this scene # taking empty frames as the breakpoint for smaller scenes indices = [ i for i, _ in enumerate(features["features"]) if _ == { "shape": [], "lips": [], "rects": [] } ] indices = [0] + indices + [len(scene)] breakpoints = [(indices[i], indices[i + 1]) for i in range(0, len(indices) - 1)] breakpoints = [ _breakpoint for _breakpoint in breakpoints if _breakpoint[0] +