예제 #1
0
def extractSlides(videoPath):
    print(f"Reading {videoPath.as_posix()}...")

    vr = VideoReader(videoPath.as_posix(), ctx=cpu(0))
    fps = vr.get_avg_fps()
    print(f"Successfully read. FPS: {fps}")

    slides = []
    frameCount = 1
    prevImageHash = None
    imageChanged = False

    for i in trange(0, len(vr), int(fps)):
        frame = vr[i].asnumpy()
        pilImage = Image.fromarray(frame)
        prevImageHash = imagehash.average_hash(pilImage) if not prevImageHash else currentImageHash
        currentImageHash = imagehash.average_hash(pilImage)
        imageDiff = currentImageHash - prevImageHash

        if imageChanged and imageDiff < DIFF_THRESHOLD:
            slides.append(pilImage)
            imageChanged = False

        if imageDiff > DIFF_THRESHOLD:
            imageChanged = True

    return slides
def extract_frames_from_video(video_file, video_id, target_dir):
    results = []

    """
    for each video file creates the corresponding directory if not exists 
    csv item: video_id;path_to_frame;frame_index;avg_fps;yolo3_classes;caption;score;
    """
    # a file like object works as well, for in-memory decoding
    with open(video_file, 'rb') as f:
        vr = VideoReader(f, ctx=cpu(0))
        print('video frames:', len(vr))
        total_frames = len(vr)
        avg_fps = int(vr.get_avg_fps())
        # 1. the simplest way is to directly access frames
        print('get_avg_fps=', vr.get_avg_fps())
        for i in range(0, len(vr), avg_fps):
            
      #      # the video reader will handle seeking and skipping in the most efficient manner
            frame = vr[i]
            save_path = os.path.join(target_dir,"{:010d}.jpg".format(i))
            if not os.path.exists(save_path):
                print(frame.shape)
                img = frame.asnumpy()
                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                detected_classes_list = detect_objects_single_image(img)
                words = get_caption_single_image(img)
                score= get_score(words, detected_classes_list)
                if score > 2:
                    frame_result = []
                    frame_result.append(video_id)
                    frame_result.append(save_path)
                    frame_result.append(i)
                    frame_result.append(avg_fps)
                    frame_result.append(detected_classes_list)
                    frame_result.append(words)
                    frame_result.append(score)
                    cv2.imwrite(save_path, img)
                    results.extend(frame_result)
    return results
예제 #3
0
import cv2
import os
from decord import VideoReader
from decord import cpu, gpu
<<<<<<< HEAD
import easyocr

reader = easyocr.Reader(['en'])
video = VideoReader("vim.mp4")
print(len(video))
current_frame = 0
fps = int(round(video.get_avg_fps()))
q = 0
text = open('text.txt', 'w')
time = open('time.txt', 'w')

for current_frame in range(0, len(video), 5 * fps):
    if current_frame % (len(video) // 100) < 5 * fps:
        print(str(q) + '%', current_frame)
        q += 1
    image = video[current_frame].asnumpy()
    name = 'tmp/' + str(current_frame) + '.jpg'
    cv2.imwrite(name, image)
    result = [[x[1], x[2]] for x in reader.readtext(f'{name}')]
    result = list(filter(lambda x: x[1] > 0.5, result))
    for i in result:
        for x in i[0].split():
            text.write(x + '\n')
            time.write(str(current_frame // fps) + '\n')
    os.remove(name)
예제 #4
0
def process_frames(original_filename, output_filename, mask_filename,
                   thumbnail_filename):
    # change to gpu(0) for faster processing
    vr = VideoReader(original_filename, ctx=cpu(0))

    height, width, layers = vr[0].shape
    print(f'\u001b[33mInput frame {height}x{width}x{layers}\u001b[0m')

    fourcc = cv2.VideoWriter_fourcc(*'FFV1')
    video = cv2.VideoWriter(output_filename + '.lossless.mkv', fourcc,
                            vr.get_avg_fps(), (width, height))
    video_mask = cv2.VideoWriter(mask_filename + '.lossless.mkv', fourcc,
                                 vr.get_avg_fps(), (320, 320))

    # solid color image
    keying_bg = create_keying_background(width, height, (0, 255, 0))

    for frame in vr:

        # convert to numpy format
        frame_np = frame.asnumpy()

        # run u2net
        mask_np = u2net.run(frame_np)

        # write frame to mask video
        mask_np_uint8 = (mask_np * 255).astype(np.uint8)
        mask_np_bgr = np.stack([mask_np_uint8] * 3,
                               axis=-1)  # https://stackoverflow.com/a/40119878
        video_mask.write(mask_np_bgr)

        # resize u2net output (320x320) to original frame resolution
        mask_cv2 = cv2.resize(mask_np, (width, height))

        # scale mask values from the range [0, 1] to [0, 255]
        mask_cv2_uint8 = (mask_cv2 * 255).astype(np.uint8)

        # thresholding the mask to have clear outlines
        ret, mask_cv2_uint8 = cv2.threshold(mask_cv2_uint8, 10, 255,
                                            cv2.THRESH_BINARY)

        # compute inverse mask
        mask_cv2_uint8_inv = cv2.bitwise_not(mask_cv2_uint8)

        # apply mask to image and merge with keying background
        frame_fg = cv2.bitwise_and(frame_np, frame_np, mask=mask_cv2_uint8)
        frame_bg = cv2.bitwise_and(keying_bg,
                                   keying_bg,
                                   mask=mask_cv2_uint8_inv)
        output_cv2 = frame_fg + frame_bg

        # convert the color space back to BGR
        output_cv2 = cv2.cvtColor(output_cv2, cv2.COLOR_RGB2BGR)

        video.write(output_cv2)

    cv2.destroyAllWindows()
    video.release()
    video_mask.release()

    # encode videos to h264
    thumbnail_proc = start_thumbnail(output_filename + '.lossless.mkv',
                                     thumbnail_filename)
    video_enc_proc = start_encode_video(output_filename + '.lossless.mkv',
                                        output_filename)
    video_mask_enc_proc = start_encode_video(mask_filename + '.lossless.mkv',
                                             mask_filename)
    assert thumbnail_proc.wait() == 0, 'Thumbnail encoding failed'
    assert video_enc_proc.wait() == 0, 'Video encoding failed'
    assert video_mask_enc_proc.wait() == 0, 'Mask video encoding failed'
예제 #5
0
    scene_files = glob(os.path.join(CURRENT_DOWNLOAD_PATH, "*Scene*.mp4"))
    for path in scene_files:
        scene_to_json(path, outfolder=CURRENT_DOWNLOAD_PATH)

    # parsing the scenes
    processed_files = glob(os.path.join(CURRENT_DOWNLOAD_PATH, "*Scene*.json"))
    for file_path in tqdm(processed_files, disable=False):
        features = json.load(open(file_path, "r"))
        scene_path = os.path.join(
            f"{CURRENT_DOWNLOAD_PATH}",
            os.path.basename(file_path).split(".")[0] + ".mp4",
        )
        # check if the scene in present
        if scene_path in scene_files:
            scene = VideoReader(scene_path, ctx=cpu(0))
            fps = scene.get_avg_fps()
            # extracting all the face utterances from this scene
            # taking empty frames as the breakpoint for smaller scenes
            indices = [
                i for i, _ in enumerate(features["features"]) if _ == {
                    "shape": [],
                    "lips": [],
                    "rects": []
                }
            ]
            indices = [0] + indices + [len(scene)]
            breakpoints = [(indices[i], indices[i + 1])
                           for i in range(0,
                                          len(indices) - 1)]
            breakpoints = [
                _breakpoint for _breakpoint in breakpoints if _breakpoint[0] +