예제 #1
0
파일: tasks.py 프로젝트: pietelite/pegasus
def _compile_worker(session_key: str, video_id: str) -> None:
    # Use this for conditional creation
    config = get_nosql_handler().get_video_config(video_id)

    session_clips = get_sql_handler().get_session_clips_by_session_key(session_key)
    for session_clip in session_clips:
        download_session_clip(session_clip, sync=True)
    session_audios = get_sql_handler().get_session_audio_by_session_key(session_key)
    session_audio = None
    if session_audios:
        session_audio = session_audios[0]
        download_session_audio(session_audio, sync=True)

    # Create VideoFileClips
    clips = [VideoFileClip(session_clip.local_file_path()) for session_clip in session_clips]
    total_duration = sum([clip.duration for clip in clips])

    # Make all clips the same size
    final_w = min([clip.w for clip in clips])
    final_h = min([clip.h for clip in clips])
    clips = [resize(clip, newsize=(final_w, final_h)) for clip in clips]

    # Adding gamertag and logo to the video
    # gamertag = config.get('gamertag', '')
    # gamertag_position = config.get('gamertag_position', ['right', 'bottom'])
    #
    # if gamertag != '':
    #     gamertag_clip = TextClip(txt='@'+gamertag, fontsize=50, font = 'Comfortaa', color='white')
    #     gamertag_clip = gamertag_clip.set_duration(final.duration)\
    #                         .margin(right=8,top = 8, left=8, bottom=8, opacity=0)\
    #                         .set_position((gamertag_position[0], gamertag_position[1])

    # === WATERMARK ===
    logo_position = config.get('logo_position', ['left', 'top'])
    logo_clip = ImageClip('./reels/static/reels/reels-logo-white.png')
    logo_clip = resize(logo_clip, height=final_h / 5)
    try:
        logo_x = (0 if logo_position[0] == 'left' else final_w - logo_clip.w)
        logo_y = (0 if logo_position[1] == 'top' else final_h - logo_clip.h)
    except (KeyError, TypeError):
        logo_x, logo_y = 0, final_h - logo_clip.h
    logo_clip = logo_clip.set_pos((logo_x, logo_y))
    clips = [CompositeVideoClip([clip, logo_clip.set_duration(clip.duration)]) for clip in clips]

    # Concatenate clips
    final = concatenate_videoclips(clips, method="compose")

    # Add audio, only if there is audio
    audio_clip = None
    if session_audio:
        audio_clip = AudioFileClip(session_audio.local_file_path())
        audio_clip = audio_clip \
            .set_start(config.get('audio_start', 0)) \
            .set_end(config.get('audio_end', audio_clip.duration))
        # Attach audio to video, but make it only as long as the videos are
        # TODO: Manage case where videos are longer than audio clip
        final = final.set_audio(audio_clip.set_duration(final.duration))

    # If extra editing is enabled, do so
    if config.get('extras', False) and session_audio and get_file_type(session_audio.local_file_path()) == 'wav':
        fs, data = read(session_audio.local_file_path())
        data = data[:, 0]
        data = data[:len(data) - len(data) % 48000]
        data2 = np.mean(data.reshape(-1, int(48000 / 4)), axis=1)
        x = np.diff(data2, n=1)
        secs = np.where(x > 200)[0]
        t = list(secs[np.where(np.diff(secs) > 12)[0] + 1])
        if np.diff(secs)[0] > 12:
            t.insert(0, secs[0])
        for i in range(0, len(t)):
            t[i] /= 4
        for i in t:
            tfreeze = i
            if tfreeze + 1.75 >= final.duration:
                break
            clip_before = final.subclip(t_end=tfreeze)
            clip_after = final.subclip(t_start=tfreeze + 1)
            clip = final.subclip(t_start=tfreeze, t_end=tfreeze + 1)
            if int(i) % 2 == 0:
                clip = clip.fl_image(invert_colors).crossfadein(0.5).crossfadeout(0.5)
            else:
                clip = clip.fx(vfx.painting, saturation=1.6, black=0.006).crossfadein(0.5).crossfadeout(0.5)
            final = concatenate_videoclips([clip_before,
                                            clip,
                                            clip_after])
    else:
        pass

    # === Final Saving ===
    video = get_sql_handler().get_video(video_id)
    final.write_videofile(filename=video.local_file_path(),
                          verbose=True,
                          codec="libx264",
                          audio_codec='aac',
                          temp_audiofile=f'temp-audio-{video.video_id}.m4a',
                          remove_temp=True,
                          preset="medium",
                          ffmpeg_params=["-profile:v", "baseline", "-level", "3.0", "-pix_fmt", "yuv420p"])
    # close local files because we don't need them anymore and so they can be removed later
    for clip in clips:
        clip.close()

    if audio_clip:
        audio_clip.close()
    # upload to cold storage
    save_video(video, sync=True, clean=False)
예제 #2
0
def test_withoutaudio():
    clip = VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0.2, 0.6)
    new_clip = clip.without_audio()
    assert new_clip.audio is None
    close_all_clips(locals())
예제 #3
0
def test_write_gif_imageio():
    clip = VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0.2, 0.8)
    location = os.path.join(TMP_DIR, "imageio_gif.gif")
    clip.write_gif(location, program="imageio")
    assert os.path.isfile(location)
    close_all_clips(locals())
예제 #4
0
import re
"""
    Main File which takes a video as input and cuts the AD sections out
"""

ap = argparse.ArgumentParser()
ap.add_argument('-v', '--video', required=True)
ap.add_argument('-c', '--count', type=int, required=True)
ap.add_argument('-m', '--model', required=True)
ap.add_argument('-o', '--offset')
args = vars(ap.parse_args())

print(f'[STATUS] Cutting images every {args["count"]} seconds')

# Load video
video_f = VideoFileClip(args['video'])
image_data = []
multiplier = args['count']


def cut_images(start, end):
    for x in range(start, end + 1):
        video_f.save_frame(f'{TMP_PATH}/{str(x)}.jpg', t=x * multiplier)


total_frame_count = video_f.duration
image_cut_count = int(total_frame_count / multiplier)

num_threads = 8

for x in range(num_threads):
예제 #5
0
def test_write_gif_ffmpeg_pixel_format():
    clip = VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0.2, 0.4)
    location = os.path.join(TMP_DIR, "ffmpeg_gif.gif")
    clip.write_gif(location, program="ffmpeg", pixel_format="bgr24")
    assert os.path.isfile(location)
    close_all_clips(locals())
예제 #6
0
def detect_on_video():
    video = VideoFileClip(IN_VIDEO_PATH)
    output_video = video.fl_image(process_video)
    output_video.write_videofile(OUT_VIDEO_PATH, audio=False)
예제 #7
0
def process(text_file, video_file, align_file, label_file):
    with open(text_file, 'r') as f:
        content = f.read()

    #todo decipher the gender of the speaker using the text file. should i automate it or make it all by hand????? big question
    time_pairs = get_alignments(align_file)
    lines = content.splitlines()
    times = []
    for line in lines:
        x = (line.split(': ')[0].strip().split(' ')[1])
        # print(x)
        # print(float(x.split('-')[0].split('[')[1]))
        times.append([
            float(x.split('-')[0].split('[')[1]),
            float(x.split('-')[1].split(']')[0])
        ])
    main_features = []
    with VideoFileClip(video_file) as video:
        for i in times:
            feats = []
            new = video.subclip(i[0], i[1])
            new.write_videofile('video.mp4', audio_codec='aac')
            with VideoFileClip('video.mp4') as sentence_video:
                for k in time_pairs:
                    word_video = sentence_video.subclip(
                        time_pairs[0], time_pairs[1])
                    word_video.write('word_video.mp4', audio_codec='aac')
                    fImages = []
                    mImages = []
                    cap = cv2.VideoCapture('testing.mp4')
                    hog_face_detector = dlib.get_frontal_face_detector()
                    while True:
                        ret, img = cap.read()
                        # print(img)
                        if isinstance(img, type(None)):

                            break
                        else:

                            height, width = img.shape[:2]

                            start_row, start_col = int(0), int(0)

                            end_row, end_col = int(height), int(width * 0.5)
                            cropped_top = img[start_row:end_row,
                                              start_col:end_col]

                            faces_hog = hog_face_detector(cropped_top, 1)

                            for face in faces_hog:
                                x = face.left()

                                y = face.top()

                                w = face.right() - x

                                h = face.bottom() - y

                                cropping = cropped_top[y:y + h, x:x + w]

                                cropping = cv2.resize(cropping, (125, 125))
                                # cv2_imshow(cropping)
                                fImages.append(cropping)
                            # cv2_imshow(cropped_top)

                            cv2.waitKey(0)
                            cv2.destroyAllWindows()

                            # Let's get the starting pixel coordiantes (top left of cropped bottom)
                            start_row, start_col = int(0), int(width * 0.5)
                            # Let's get the ending pixel coordinates (bottom right of cropped bottom)
                            end_row, end_col = int(height), int(width)
                            cropped_bot = image[start_row:end_row,
                                                start_col:end_col]
                            # print(start_row, end_row )
                            # print(start_col, end_col)
                            mImages.append(cropped_bot)
                            # cv2_imshow( cropped_bot)
                            cv2.waitKey(0)
                            cv2.destroyAllWindows()
예제 #8
0
def extract_audio(input_video_path, input_audio_path='./data/test.wav'):
    video = VideoFileClip(input_video_path)
    audio = video.audio
    audio.write_audiofile(input_audio_path, codec='pcm_s16le')
예제 #9
0
def get_test_video():
    return VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0, 1)
예제 #10
0
def align_videos(subject, date):
    frame_buffer = 10

    base_video_path=os.path.join(cfg['preprocessed_data_dir'],subject, date,'video')
    with open(os.path.join(base_video_path,'config.json')) as json_file:
        vid_cfg=json.load(json_file)
    fnames=sorted(glob.glob(os.path.join(base_video_path, 'front', '%s*.avi' % date)))

    rois_checked={
        'front':False,
        'side':False,
        'top':False
    }
    crop_checked={
        'front':False,
        'side':False,
        'top':False
    }

    # For each file (filenames are same in each view directory)
    for fname in fnames:
        fname=os.path.split(fname)[-1]
        print('Processing %s' % fname)
        blue_onsets={}
        yellow_onsets={}
        blue_ts={}
        yellow_ts={}
        video_nframes={}

        # Whether or not to use LED for alignment
        led_based = True

        for view in cfg['camera_views']:
            video_path=os.path.join(base_video_path, view)
            clip = VideoFileClip(os.path.join(video_path, fname))
            n_frames_approx = int(np.ceil(clip.duration * clip.fps) + frame_buffer)
            n_frames = n_frames_approx
            clip.reader.initialize()

            # Initialize LED time series for this view
            blue_ts[view]=[]
            yellow_ts[view]=[]

            for index in range(n_frames_approx):
                image = img_as_ubyte(clip.reader.read_frame())

                # If not already set, show GUI to select blue LED ROI
                if not rois_checked[view]:
                    blue_led_roi_area=vid_cfg['blue_led_roi_areas'][view]
                    blue_cropped_img=image[blue_led_roi_area[2]:blue_led_roi_area[3],
                                     blue_led_roi_area[0]:blue_led_roi_area[1],:]
                    init_roi=None
                    if  view in vid_cfg['blue_led_rois'] and vid_cfg['blue_led_rois'][view] is not None:
                        init_roi=vid_cfg['blue_led_rois'][view]
                        init_roi[0] = init_roi[0] - blue_led_roi_area[0]
                        init_roi[1] = init_roi[1] - blue_led_roi_area[0]
                        init_roi[2] = init_roi[2] - blue_led_roi_area[2]
                        init_roi[3] = init_roi[3] - blue_led_roi_area[2]
                    vid_cfg['blue_led_rois'][view] = select_crop_parameters.show(blue_cropped_img, 'Select blue LED ROI', init_coords=init_roi)
                    vid_cfg['blue_led_rois'][view][0] = vid_cfg['blue_led_rois'][view][0] + blue_led_roi_area[0]
                    vid_cfg['blue_led_rois'][view][1] = vid_cfg['blue_led_rois'][view][1] + blue_led_roi_area[0]
                    vid_cfg['blue_led_rois'][view][2] = vid_cfg['blue_led_rois'][view][2] + blue_led_roi_area[2]
                    vid_cfg['blue_led_rois'][view][3] = vid_cfg['blue_led_rois'][view][3] + blue_led_roi_area[2]

                    yellow_led_roi_area = vid_cfg['yellow_led_roi_areas'][view]
                    yellow_cropped_img = image[yellow_led_roi_area[2]:yellow_led_roi_area[3],
                                       yellow_led_roi_area[0]:yellow_led_roi_area[1], :]
                    init_roi = None
                    if view in vid_cfg['yellow_led_rois'] and vid_cfg['yellow_led_rois'][view] is not None:
                        init_roi = vid_cfg['yellow_led_rois'][view]
                        init_roi[0] = init_roi[0] - yellow_led_roi_area[0]
                        init_roi[1] = init_roi[1] - yellow_led_roi_area[0]
                        init_roi[2] = init_roi[2] - yellow_led_roi_area[2]
                        init_roi[3] = init_roi[3] - yellow_led_roi_area[2]
                    vid_cfg['yellow_led_rois'][view] = select_crop_parameters.show(yellow_cropped_img, 'Select yellow LED ROI', init_coords=init_roi)
                    vid_cfg['yellow_led_rois'][view][0] = vid_cfg['yellow_led_rois'][view][0] + yellow_led_roi_area[0]
                    vid_cfg['yellow_led_rois'][view][1] = vid_cfg['yellow_led_rois'][view][1] + yellow_led_roi_area[0]
                    vid_cfg['yellow_led_rois'][view][2] = vid_cfg['yellow_led_rois'][view][2] + yellow_led_roi_area[2]
                    vid_cfg['yellow_led_rois'][view][3] = vid_cfg['yellow_led_rois'][view][3] + yellow_led_roi_area[2]

                    rois_checked[view]=True
                    
                if index == int(n_frames_approx - frame_buffer * 2):
                    last_image = image
                elif index > int(n_frames_approx - frame_buffer * 2):
                    if (image == last_image).all():
                        n_frames = index
                        break

                # Crop image around blue LED, get only blue channel
                blue_roi=vid_cfg['blue_led_rois'][view]
                blue_led_image = image[blue_roi[2]:blue_roi[3], blue_roi[0]:blue_roi[1], 2]
                # Add average of cropped image to blue LED timeseries
                blue_ts[view].append(np.mean(blue_led_image))

                # Crop image around yellow LED, average red and green channels
                yellow_roi=vid_cfg['yellow_led_rois'][view]
                yellow_led_image = np.mean(image[yellow_roi[2]:yellow_roi[3], yellow_roi[0]:yellow_roi[1], 0:1],axis=2)
                # Add average of cropped image to yellow LED timeseries
                yellow_ts[view].append(np.mean(yellow_led_image))

            blue_ts[view] = np.array(blue_ts[view])
            yellow_ts[view] = np.array(yellow_ts[view])

            # Normalize based on first 10 time steps
            if len(blue_ts[view])>10:
                blue_ts[view]=(blue_ts[view]-np.mean(blue_ts[view][0:10]))/np.mean(blue_ts[view][0:10])
            if len(yellow_ts[view])>10:
                yellow_ts[view]=(yellow_ts[view]-np.mean(yellow_ts[view][0:10]))/np.mean(yellow_ts[view][0:10])
            blue_ts[view]=blue_ts[view]/np.max(blue_ts[view])
            yellow_ts[view]=yellow_ts[view]/np.max(yellow_ts[view])

            # plt.figure()
            # plt.subplot(2,1,1)
            # plt.plot(video_blue_brightness)
            # plt.subplot(2, 1, 2)
            # plt.plot(video_yellow_brightness)
            # plt.show()

            # Get derivative of blue and yellow ts
            #blue_diff=np.diff(blue_ts[view])
            #yellow_diff=np.diff(yellow_ts[view])

            # Get peak blue and yellow LED change times
            #blue_peak=np.max(blue_diff)
            blue_peak = np.max(blue_ts[view])
            #yellow_peak=np.max(yellow_diff)
            yellow_peak = np.max(yellow_ts[view])

            # If none above 0.05, don't use LEDs for aligning
            #if blue_peak<.05 or yellow_peak<.05:
            if len(blue_ts[view])<10 or np.max(blue_ts[view][10:]) < .25 or np.max(yellow_ts[view])<.25:
                led_based=False
                print('Cant figure out LED onset - not using')
            # Otherwise, use the first time point after 25 time points where LED diff exceeds 0.05
            else:
                #blue_onsets[view] = np.where(blue_diff >= 0.05)[0][0]
                #np.where(blue_ts[view] >= 0.25)[0][0]
                blue_onsets[view] = 10 + np.where(blue_ts[view][10:] >= 0.25)[0][0]
                #yellow_onsets[view] = np.where(yellow_diff >= 0.05)[0][0]
                yellow_onsets[view] = np.where(yellow_ts[view] >= 0.25)[0][0]

            video_nframes[view]=n_frames

        # Use first view where blue LED exceeds threshold as reference to align to
        if len(blue_onsets.values())>0:
            min_blue_onset=min(blue_onsets.values())

        # if fname=='15-05-2019_10-34-15_11.avi':
        #     plt.figure()
        #     for view in cfg['camera_views']:
        #         plt.plot(blue_ts[view], label='%s: blue' % view)
        #         plt.plot(yellow_ts[view], label='%s: yellow' % view)
        #     plt.legend()
        #     plt.show()

        # Compute trial duration based on each view
        trial_durations={}
        for view in cfg['camera_views']:
            if view in blue_onsets and view in yellow_onsets:
                # Trial duration (in ms)
                trial_duration=(yellow_onsets[view]-blue_onsets[view])*1.0/clip.fps*1000.0
                # there is an 850ms delay before blue LED comes on
                if trial_duration>0:
                    trial_duration=trial_duration+850.0
                trial_durations[view]=trial_duration
                print('%s: %.2fms' % (view, trial_duration))
        #assert(len(trial_durations)>0 and all(x == trial_durations[0] for x in trial_durations))

        start_frames_to_cut={}
        n_frames_after_cutting = {}
        # Cut frames to align videos and crop
        for idx,view in enumerate(cfg['camera_views']):

            # using LED to align
            if led_based:
                start_frames_to_cut[view]=blue_onsets[view]-min_blue_onset
            # otherwise - use standard # of frames to crop (order of video triggering is top, side, front)
            if not led_based or start_frames_to_cut[view]>5:
                start_frames_to_cut[view] = 0
                if view=='front':
                    start_frames_to_cut[view]=2
                elif view=='side':
                    start_frames_to_cut[view]=1
            n_frames_after_cutting[view]=video_nframes[view]-start_frames_to_cut[view]
        new_nframes=min(n_frames_after_cutting.values())

        intrinsic_files = {}
        for view in cfg['camera_views']:
            dlc3d_cfg = os.path.join('/data/tool_learning/preprocessed_data/dlc_projects',
                                     'visual_grasp_3d-Jimmy-2019-08-19-3d', 'config.yaml')

            cfg_3d = auxiliaryfunctions.read_config(dlc3d_cfg)
            img_path, path_corners, path_camera_matrix, path_undistort = auxiliaryfunctions_3d.Foldernames3Dproject(
                cfg_3d)
            path_intrinsic_file = os.path.join(path_camera_matrix, '%s_intrinsic_params.pickle' % view)
            intrinsic_file = auxiliaryfunctions.read_pickle(path_intrinsic_file)
            intrinsic_files[view] = intrinsic_file[view]

        for idx, view in enumerate(cfg['camera_views']):
            camera_matrix = intrinsic_files[view]['mtx']
            distortion_coefficients = intrinsic_files[view]['dist']

            end_frames_to_cut=n_frames_after_cutting[view]-new_nframes
            print('cutting %d frames from beginning and %d frames from end of %s' % (start_frames_to_cut[view], end_frames_to_cut, view))

            # Cut frames from blue and yellow LED time series and onsets
            if end_frames_to_cut>0:
                blue_ts[view]=blue_ts[view][start_frames_to_cut[view]:-end_frames_to_cut]
                yellow_ts[view] = yellow_ts[view][start_frames_to_cut[view]:-end_frames_to_cut]
            else:
                blue_ts[view] = blue_ts[view][start_frames_to_cut[view]:]
                yellow_ts[view] = yellow_ts[view][start_frames_to_cut[view]:]
            if view in blue_onsets:
                blue_onsets[view]=blue_onsets[view]-start_frames_to_cut[view]
            if view in yellow_onsets:
                yellow_onsets[view]=yellow_onsets[view]-start_frames_to_cut[view]

            # Load video and cut frames from beginning
            video_path = os.path.join(base_video_path, view)
            clip = VideoFileClip(os.path.join(video_path, fname))

            # Crop limits based on view
            frames=[]
            n_frames_approx = int(np.ceil(clip.duration * clip.fps)+frame_buffer)
            for index in range(n_frames_approx):
                image = img_as_ubyte(clip.reader.read_frame())
                image = cv2.undistort(image, camera_matrix, distortion_coefficients)
                if index>=start_frames_to_cut[view]:
                    if not crop_checked[view]:
                        init_crop_lims = None
                        if view in vid_cfg['crop_limits'] and vid_cfg['crop_limits'][view] is not None:
                            init_crop_lims = vid_cfg['crop_limits'][view]
                        vid_cfg['crop_limits'][view] = select_crop_parameters.show(image, 'Select crop limits',
                                                                                   init_coords=init_crop_lims)
                        crop_checked[view]=True
                    # Crop image and save to video
                    crop_lims=vid_cfg['crop_limits'][view]
                    image=image[crop_lims[2]:crop_lims[3], crop_lims[0]:crop_lims[1], :]
                    frames.append(image)
                if len(frames)==new_nframes:
                    break

            clip.close()

            # Check that have the right number of frames
            assert(len(frames)==new_nframes)

            # Create new video clip (cropped and aligned)
            video_path = os.path.join(base_video_path, view)
            new_clip = VideoProcessorCV(sname=os.path.join(video_path, fname), fps=clip.fps, codec='mp4v',
                                        sw=crop_lims[1] - crop_lims[0], sh=crop_lims[3] - crop_lims[2])
            for frame in frames:
                new_clip.save_frame(np.uint8(frame))
            new_clip.close()

        # Make everything hashable
        for view in cfg['camera_views']:
            blue_ts[view]=blue_ts[view].tolist()
            yellow_ts[view] = yellow_ts[view].tolist()
            if view in blue_onsets:
                blue_onsets[view]=int(blue_onsets[view])
            if view in yellow_onsets:
                yellow_onsets[view]=int(yellow_onsets[view])
            if view in trial_durations:
                trial_durations[view]=float(trial_durations[view])

        # Save video info to JSON
        data = {
            'blue_roi': vid_cfg['blue_led_rois'],
            'yellow_roi': vid_cfg['yellow_led_rois'],
            'blue_ts': blue_ts,
            'yellow_ts': yellow_ts,
            'blue_onset': blue_onsets,
            'yellow_onset': yellow_onsets,
            'trial_duration': trial_durations,
            'fname': fname
        }
        [base, ext] = os.path.splitext(fname)
        with open(os.path.join(base_video_path, '%s.json' % base), 'w') as outfile:
            json.dump(data, outfile)

        print('')

    with open(os.path.join(base_video_path,'config.json'),'w') as outfile:
        json.dump(vid_cfg, outfile)
예제 #11
0
def combine_video(base_video_path, fnames, out_path, out_fname):
    # Output video size
    out_size = [2040, 1084]
    frame_buffer = 10

    # Process videos
    front_video = VideoFileClip(os.path.join(base_video_path, 'front', fnames['front']))
    side_video = VideoFileClip(os.path.join(base_video_path, 'side', fnames['side']))
    top_video = VideoFileClip(os.path.join(base_video_path, 'top', fnames['top']))
    if '3d' in fnames:
        video_3d=VideoFileClip(os.path.join(base_video_path, fnames['3d']))

    fps = front_video.fps
    # Create new clip and write frames
    new_clip = VideoProcessorCV(
        sname=os.path.join(out_path, out_fname),
        fps=fps, codec='mp4v', sw=out_size[0], sh=out_size[1])

    n_frames_approx = int(np.ceil(front_video.duration * front_video.fps) + frame_buffer)
    n_frames = n_frames_approx
    front_video.reader.initialize()
    side_video.reader.initialize()
    top_video.reader.initialize()
    if '3d' in fnames:
        video_3d.reader.initialize()

    for index in range(n_frames_approx):
        front_image = img_as_ubyte(front_video.reader.read_frame())
        if index == int(n_frames_approx - frame_buffer * 2):
            last_front_image = front_image
        elif index > int(n_frames_approx - frame_buffer * 2):
            if (front_image == last_front_image).all():
                n_frames = index
                break

        side_image = img_as_ubyte(side_video.reader.read_frame())
        if index == int(n_frames_approx - frame_buffer * 2):
            last_side_image = side_image
        elif index > int(n_frames_approx - frame_buffer * 2):
            if (side_image == last_side_image).all():
                n_frames = index
                break

        top_image = img_as_ubyte(top_video.reader.read_frame())
        if index == int(n_frames_approx - frame_buffer * 2):
            last_top_image = top_image
        elif index > int(n_frames_approx - frame_buffer * 2):
            if (top_image == last_top_image).all():
                n_frames = index
                break

        if '3d' in fnames:
            image_3d = img_as_ubyte(video_3d.reader.read_frame())
            if index == int(n_frames_approx - frame_buffer * 2):
                last_3d_image = image_3d
            elif index > int(n_frames_approx - frame_buffer * 2):
                if (image_3d == last_3d_image).all():
                    n_frames = index
                    break

        # Resize frame
        front_factor = np.min([out_size[0] / 2.0 / front_image.shape[1], out_size[1] / 2.0 / front_image.shape[0]])
        front_image = cv2.resize(front_image, None, fx=front_factor, fy=front_factor)
        side_factor = np.min([out_size[0] / 2.0 / side_image.shape[1], out_size[1] / 2.0 / side_image.shape[0]])
        side_image = cv2.resize(side_image, None, fx=side_factor, fy=side_factor)
        top_factor = np.min([out_size[0] / 2.0 / top_image.shape[1], out_size[1] / 2.0 / top_image.shape[0]])
        top_image = cv2.resize(top_image, None, fx=top_factor, fy=top_factor)
        if '3d' in fnames:
            factor_3d = np.min([out_size[0] / 2.0 / image_3d.shape[1], out_size[1] / 2.0 / image_3d.shape[0]])
            image_3d = cv2.resize(image_3d, None, fx=factor_3d, fy=factor_3d)

        # Initialize new frame and add front image to it
        new_frame = np.zeros((out_size[1], out_size[0], 3))
        extra_x_space = out_size[0] / 2 - front_image.shape[1]
        extra_y_space = out_size[1] / 2 - front_image.shape[0]
        start_x=int(out_size[1] / 2 + extra_y_space / 2)
        end_x=int(out_size[1] / 2 + front_image.shape[0] + extra_y_space / 2)
        start_y=int(0 + extra_x_space / 2)
        end_y=int(front_image.shape[1] + extra_x_space / 2)
        new_frame[start_x:end_x,start_y:end_y, :] = front_image

        # Add side image to frame
        extra_x_space = out_size[0] / 2 - side_image.shape[1]
        extra_y_space = out_size[1] / 2 - side_image.shape[0]
        start_x=int(0 + extra_y_space / 2)
        end_x=int(side_image.shape[0] + extra_y_space / 2)
        start_y=int(out_size[0] / 2 + extra_x_space / 2)
        end_y=int(out_size[0] / 2 + side_image.shape[1] + extra_x_space / 2)
        new_frame[start_x:end_x,start_y:end_y,:] = side_image

        # Add top image to frame
        extra_x_space = out_size[0] / 2 - top_image.shape[1]
        extra_y_space = out_size[1] / 2 - top_image.shape[0]
        start_x=int(0 + extra_y_space / 2)
        end_x=int(top_image.shape[0] + extra_y_space / 2)
        start_y=int(0 + extra_x_space / 2)
        end_y=int(top_image.shape[1] + extra_x_space / 2)
        new_frame[start_x:end_x,start_y:end_y, :] = top_image

        if '3d' in fnames:
            extra_x_space = out_size[0] / 2 - image_3d.shape[1]
            extra_y_space = out_size[1] / 2 - image_3d.shape[0]
            start_x = int(out_size[1] / 2 + extra_y_space / 2)
            end_x = int(out_size[1] / 2 + image_3d.shape[0] + extra_y_space / 2)
            start_y = int(out_size[0] / 2 + extra_x_space / 2)
            end_y = int(out_size[0] / 2 + image_3d.shape[1] + extra_x_space / 2)
            new_frame[start_x:end_x, start_y:end_y, :] = image_3d


        new_clip.save_frame(np.uint8(new_frame))
    front_video.close()
    del front_video
    side_video.close()
    del side_video
    top_video.close()
    del top_video
    if '3d' in fnames:
        video_3d.close()
        del video_3d
    new_clip.close()
input_delay = 0.35 #++ kdyz je hudba driv, -- kdyz je pozdeji
delay = 0.36-input_delay #o kolik sekund je posunuty audio
position = 0.36


output_video_path = 'test.mp4'

clips = []



for i in range(pocet_klipu):

	delka = beats[i]+after_delay-position
	zacatek = shots[i]+after_delay-delka

	shot=zacatek+delka-after_delay
	print(delka)

	clips.append(VideoFileClip(paths[i]).subclip(zacatek,zacatek+delka))
	position+=delka


audioclip = AudioFileClip(song_path).subclip(delay,beats[pocet_klipu-1]+after_delay)



final_clip = concatenate_videoclips(clips).set_audio(audioclip)

final_clip.write_videofile(output_video_path, audio_codec='aac')
#final_clip.preview()
예제 #13
0
from moviepy.video.io.VideoFileClip import VideoFileClip

in_vid_path = "C:\\Users\\Brandon\\Documents\\Personal_Projects\\vid_m_comp_big_data\\current_data\\downloaded_clips\\post_0010.mp4"
out_vid_path = "C:\\Users\\Brandon\\Documents\\Personal_Projects\\vid_m_comp_big_data\\vids\\post_0010__trimmed.mp4"
# time_tup = (10,17)

# input_video_path = 'myPath/vid1.mp4'
# output_video_path = 'myPath/output/vid1.mp4'

with VideoFileClip(in_vid_path) as video:
    new = video.subclip(10, 17)
    new.write_videofile(out_vid_path, audio_codec='aac')

#     trim_vid(in_vid_path, out_vid_path, time_tup)
예제 #14
0
    def convert(self, video_file, swap_model = False, duration = None, start_time = None, use_gan = False, face_filter = False, photos = True, crop_x = None, width = None, side_by_side = False):
        # Magic incantation to not have tensorflow blow up with an out of memory error.
        import tensorflow as tf
        import keras.backend.tensorflow_backend as K
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list="0"
        K.set_session(tf.Session(config=config))

        # Load model
        model_name = "Original"
        converter_name = "Masked"
        if use_gan:
            model_name = "GAN"
            converter_name = "GAN"
        model = PluginLoader.get_model(model_name)(Path(self._model_path(use_gan)))
        if not model.load(swap_model):
            print('model Not Found! A valid model must be provided to continue!')
            exit(1)

        # Load converter
        converter = PluginLoader.get_converter(converter_name)
        converter = converter(model.converter(False),
                              blur_size=8,
                              seamless_clone=True,
                              mask_type="facehullandrect",
                              erosion_kernel_size=None,
                              smooth_mask=True,
                              avg_color_adjust=True)

        # Load face filter
        filter_person = self._person_a
        if swap_model:
            filter_person = self._person_b
        filter = FaceFilter(self._people[filter_person]['faces'])

        # Define conversion method per frame
        def _convert_frame(frame, convert_colors = True):
            if convert_colors:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Swap RGB to BGR to work with OpenCV
            for face in detect_faces(frame, "cnn"):
                if (not face_filter) or (face_filter and filter.check(face)):
                    frame = converter.patch_image(frame, face)
                    frame = frame.astype(numpy.float32)
            if convert_colors:                    
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Swap RGB to BGR to work with OpenCV
            return frame
        def _convert_helper(get_frame, t):
            return _convert_frame(get_frame(t))

        media_path = self._video_path({ 'name' : video_file })
        if not photos:
            # Process video; start loading the video clip
            video = VideoFileClip(media_path)

            # If a duration is set, trim clip
            if duration:
                video = video.subclip(start_time, start_time + duration)
            
            # Resize clip before processing
            if width:
                video = video.resize(width = width)

            # Crop clip if desired
            if crop_x:
                video = video.fx(crop, x2 = video.w / 2)

            # Kick off convert frames for each frame
            new_video = video.fl(_convert_helper)

            # Stack clips side by side
            if side_by_side:
                def add_caption(caption, clip):
                    text = (TextClip(caption, font='Amiri-regular', color='white', fontsize=80).
                            margin(40).
                            set_duration(clip.duration).
                            on_color(color=(0,0,0), col_opacity=0.6))
                    return CompositeVideoClip([clip, text])
                video = add_caption("Original", video)
                new_video = add_caption("Swapped", new_video)                
                final_video = clips_array([[video], [new_video]])
            else:
                final_video = new_video

            # Resize clip after processing
            #final_video = final_video.resize(width = (480 * 2))

            # Write video
            output_path = os.path.join(self.OUTPUT_PATH, video_file)
            final_video.write_videofile(output_path, rewrite_audio = True)
            
            # Clean up
            del video
            del new_video
            del final_video
        else:
            # Process a directory of photos
            for face_file in os.listdir(media_path):
                face_path = os.path.join(media_path, face_file)
                image = cv2.imread(face_path)
                image = _convert_frame(image, convert_colors = False)
                cv2.imwrite(os.path.join(self.OUTPUT_PATH, face_file), image)
예제 #15
0
# CinEditorML model Code
# Result Video Name: scenario2

from moviepy.editor import *
import numpy as np
from moviepy.video.tools.segmenting import findObjects
from moviepy.video.io.VideoFileClip import VideoFileClip
import os

### Video

clip1 = VideoFileClip('videoTest3.mp4')

### Text

_begin = TextClip("beginningText", fontsize=70, color='white')
_begin = _begin.set_position('center').set_duration(10)
begin = CompositeVideoClip([_begin], size=[1920, 1080])

### Specific Video Part

clip1a = clip1.subclip('00:23', '01:47')
clip1a.write_videofile("clip1a.mp4", fps=30)

### Specific Video Part

clip1b = clip1.subclip('02:01', '02:21')
clip1b.write_videofile("clip1b.mp4", fps=30)

### Text
예제 #16
0
def download_hacs(root, annotations, checksums, workers=8, debug=False):
    logging.info(f'Downloading HACS videos.')
    videos = [
        v for v in annotations['videos'] if v['metadata']['dataset'] == 'HACS'
    ]

    if debug:
        # Take 5 of each type of video.
        _scene_videos = [
            v for v in videos if v['metadata']['scene'] is not None
        ]
        _noscene_videos = [v for v in videos if v['metadata']['scene'] is None]
        videos = _scene_videos[:5] + _noscene_videos[:5]

    videos_dir = root / 'videos'
    frames_dir = root / 'frames'
    tmp_dir = dir_path(root / 'cache' / 'hacs_videos')
    missing_dir = Path(root / 'hacs_missing')

    # List of (video, video_path, frame_path)
    videos_to_dump = []
    unavailable_videos = []
    for video in tqdm(videos, desc='Downloading HACS'):
        video_path = file_path(videos_dir / f"{video['name']}.mp4")
        frame_output = dir_path(frames_dir / video['name'])
        if are_tao_frames_dumped(frame_output,
                                 checksums[video['name']],
                                 warn=False):
            continue
        if not video_path.exists():
            ytid = video['metadata']['youtube_id']
            full_video = tmp_dir / f"v_{ytid}.mp4"
            missing_downloaded = missing_dir / f"{ytid}.mp4"
            if missing_downloaded.exists():
                logging.info(
                    f'Found video downloaded by user at {missing_downloaded}.')
                shutil.copy2(missing_downloaded, full_video)
            if not full_video.exists():
                url = 'http://youtu.be/' + ytid
                try:
                    vid_bytes = download_to_bytes(url)
                except BaseException:
                    vid_bytes = None
                if isinstance(vid_bytes, int) or vid_bytes is None:
                    unavailable_videos.append(
                        (ytid, video['metadata']['action']))
                    continue
                else:
                    vid_bytes = vid_bytes.getvalue()
                    if len(vid_bytes) == 0:
                        unavailable_videos.append(
                            (ytid, video['metadata']['action']))
                        continue
                with open(full_video, 'wb') as f:
                    f.write(vid_bytes)

            if video['metadata']['scene'] is not None:
                shot_endpoints = video['metadata']['scene'].rsplit('_', 1)[1]
                start, end = shot_endpoints.split('-')
                clip = VideoFileClip(str(full_video))
                subclip = clip.subclip(
                    int(start) / clip.fps,
                    int(end) / clip.fps)
                subclip.write_videofile(str(video_path),
                                        audio=False,
                                        verbose=False,
                                        progress_bar=False)
            else:
                shutil.copy2(full_video, video_path)
        videos_to_dump.append((video['name'], video_path, frame_output))

    dump_tao_frames([x[1] for x in videos_to_dump],
                    [x[2] for x in videos_to_dump], workers)
    for video, video_path, frame_dir in videos_to_dump:
        remove_non_tao_frames(frame_dir, set(checksums[video].keys()))
        assert are_tao_frames_dumped(frame_dir, checksums[video]), (
            f'Not all TAO frames for {video} were extracted.')

    if unavailable_videos:
        missing_path = file_path(missing_dir / 'missing.txt')
        logging.error('\n'.join([
            '',
            f'{len(unavailable_videos)} video(s) could not be downloaded; '
            'please request them from the HACS website by uploading ',
            f'\t{missing_path}',
            'to the following form',
            '\thttps://goo.gl/forms/0STStcLndI32oke22',
            'See the following README for details:',
            '\thttps://github.com/hangzhaomit/HACS-dataset#request-testing-videos-and-missing-videos-new',
        ]))

        with open(missing_path, 'w') as f:
            csv.writer(f).writerows(unavailable_videos)

    if len(unavailable_videos) > 20:
        logging.error(
            fill('NOTE: Over 20 HACS videos were unavailable. This may mean '
                 'that YouTube is rate-limiting your download; please try '
                 'running this script again after a few hours, or on a '
                 'different machine.'))
예제 #17
0
 def __init__(self, *video_paths, audio=None):
     self.audio = audio
     self.videos = [VideoFileClip(path) for path in video_paths]
예제 #18
0
    global BASE_IMG, CANNY_IMG
    BASE_IMG = base_img
    ysize = base_img.shape[0]
    xsize = base_img.shape[1]
    image = to_hsv(base_img)
    image = gaussian_blur(image, 3)
    image = filter_color(image)
    image = canny(image, 30, 130)
    CANNY_IMG = image
    image = region_of_interest(
        image,
        np.array([[(40, ysize), ((xsize / 2) - 20, ysize / 2 + 40),
                   ((xsize / 2) + 20, ysize / 2 + 40), (xsize - 40, ysize)]],
                 dtype=np.int32))
    image = hough_lines(image, 1, np.pi / 90, 10, 15, 10)

    # return image
    return weighted_img(image, base_img, β=250.)


src_img = (mpimg.imread('./test_images/solidYellowLeft.jpg') *
           255).astype('uint8')
src_img = process_image(src_img)
plt.imshow(src_img, cmap='hsv_r')
plt.show()

white_output = 'challengeDone.mp4'
clip1 = VideoFileClip('challenge.mp4')  # .subclip(14, 16)
white_clip = clip1.fl_image(
    process_image)  # NOTE: this function expects color images!!
white_clip.write_videofile(white_output, audio=False)
예제 #19
0
 def _load_clip(self):
     audio_fps = AudioStim.get_sampling_rate(self.filename)
     self.clip = VideoFileClip(self.filename, audio_fps=audio_fps)
예제 #20
0
def test(cfg):
    """
    Perform multi-view testing/feature extraction on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """

    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=False)

    cu.load_test_checkpoint(cfg, model)

    vid_root = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, cfg.DATA.PATH_PREFIX)
    videos_list_file = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, "vid_list.csv")

    print("Loading Video List ...")
    with open(videos_list_file) as f:
        videos = sorted(
            [x.strip() for x in f.readlines() if len(x.strip()) > 0])
    print("Done")
    print("----------------------------------------------------------")

    if cfg.DATA.READ_VID_FILE:
        rejected_vids = []

    print("{} videos to be processed...".format(len(videos)))
    print("----------------------------------------------------------")

    start_time = time.time()
    for vid_no, vid in enumerate(videos):
        # Create video testing loaders.
        path_to_vid = os.path.join(vid_root, os.path.split(vid)[0])
        vid_id = os.path.split(vid)[1]

        if cfg.DATA.READ_VID_FILE:
            try:
                _ = VideoFileClip(
                    os.path.join(path_to_vid, vid_id) + cfg.DATA.VID_FILE_EXT,
                    audio=False,
                    fps_source="fps",
                )
            except Exception as e:
                print("{}. {} cannot be read with error {}".format(
                    vid_no, vid, e))
                print(
                    "----------------------------------------------------------"
                )
                rejected_vids.append(vid)
                continue

        out_path = os.path.join(cfg.OUTPUT_DIR, os.path.split(vid)[0])
        out_file = vid_id.split(".")[0] + "_{}.npy".format(cfg.DATA.NUM_FRAMES)
        if os.path.exists(os.path.join(out_path, out_file)):
            print("{}. {} already exists".format(vid_no, out_file))
            print("----------------------------------------------------------")
            continue

        print("{}. Processing {}...".format(vid_no, vid))

        dataset = VideoSet(cfg,
                           path_to_vid,
                           vid_id,
                           read_vid_file=cfg.DATA.READ_VID_FILE)
        test_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=cfg.TEST.BATCH_SIZE,
            shuffle=False,
            sampler=None,
            num_workers=cfg.DATA_LOADER.NUM_WORKERS,
            pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
            drop_last=False,
        )

        # Perform multi-view test on the entire dataset.
        feat_arr = perform_inference(test_loader, model, cfg)

        os.makedirs(out_path, exist_ok=True)
        np.save(os.path.join(out_path, out_file), feat_arr)
        print("Done.")
        print("----------------------------------------------------------")

    if cfg.DATA.READ_VID_FILE:
        print("Rejected Videos: {}".format(rejected_vids))

    end_time = time.time()
    hours, minutes, seconds = calculate_time_taken(start_time, end_time)
    print("Time taken: {} hour(s), {} minute(s) and {} second(s)".format(
        hours, minutes, seconds))
    print("----------------------------------------------------------")
예제 #21
0
파일: Select.py 프로젝트: hyunbool/chuka
    def makeHighlight(self):
        filename = self.f_label.text()
        cap = cv2.VideoCapture(filename)
        video_for_cut = VideoFileClip(filename)
        fps = cap.get(cv2.CAP_PROP_FPS)
        #sys.path.append("..")

        MODEL_NAME = 'soccer_highlight_goal2'

        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

        # List of the strings that is used to add correct label for each box.
        PATH_TO_LABELS = os.path.join('training', 'object-detection.pbtxt')

        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.io.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

        def load_image_into_numpy_array(image):
            (im_width, im_height) = image.size
            return np.array(image.getdata()).reshape(
                (im_height, im_width, 3)).astype(np.uint8)

        count = 1
        cut_count = 0

        hightlight = []
        cut = []
        with detection_graph.as_default():
            with tf.compat.v1.Session(graph=detection_graph) as sess:
                while True:
                    ret, image_np = cap.read()
                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    image_np_expanded = np.expand_dims(image_np, axis=0)
                    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
                    # Each box represents a part of the image where a particular object was detected.
                    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                    # Each score represent how level of confidence for each of the objects.
                    # Score is shown on the result image, together with the class label.
                    scores = detection_graph.get_tensor_by_name('detection_scores:0')
                    classes = detection_graph.get_tensor_by_name('detection_classes:0')
                    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
                    # Actual detection.

                    try:
                        (boxes, scores, classes, num_detections) = sess.run(
                            [boxes, scores, classes, num_detections],
                            feed_dict={image_tensor: image_np_expanded})
                    # 동영상 끝나면 highlightui로 넘어가게 된다.
                    except TypeError:
                        self.window = Ui_HighlightWindow()
                        self.window.show()

                        break

                    # Visualization of the results of a detection.
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        np.squeeze(boxes),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        use_normalized_coordinates=True,
                        line_thickness=8)

                    if (int(cap.get(1)) % 8 == 0):

                        title = "%d.jpg" % count
                        count += 1
                        # cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))

                        if (float(100 * scores[0][0]) > 99.7):
                            print(title)
                            hightlight.append(count)
                        else:
                            if (count - 1 in hightlight and count + 1 not in hightlight):
                                cut.append(8 * (count - 4))
                                print(count - 1)

                        if (len(cut) > 1):
                            duration1 = cut[0] / fps
                            duration2 = cut[1] / fps
                            length = duration2 - duration1
                            print(length)
                            if (length > 60 or length <= 2):
                                cut[0] = cut[1]
                                del (cut[1])
                            else:
                                start_hour = (duration1 / 3600)
                                start_min = ((duration1 % 3600) / 60)
                                start_sec = duration1 % 60

                                end_hour = (duration2 / 3600)
                                end_min = ((duration2 % 3600) / 60)
                                end_sec = duration2 % 60

                                tmp_video = video_for_cut.subclip(duration1, duration2)
                                tmp_title = "./videos/%d+%d+%d~%d+%d+%d.mp4" % (
                                start_hour, start_min, start_sec, end_hour, end_min, end_sec)
                                cut_count += 1
                                tmp_video.write_videofile(tmp_title, codec='libx264')
                                cut = []
예제 #22
0
        )
    )

    image = hough_lines(image, 1, np.pi / 90, 100, 15, 10)
    #print(BASE_IMG.shape, ' - ', CANNY_IMG.shape, ' - ', HOUGH_IMG.shape)

    return weightImage(image, base_img, β=250.)


# image = mimg.imread('test3.jpg')
# new_img = processImage(image)
# new_file = 'test3Processed.jpg'
# # mplt.imshow(new_img, cmap='gray')
# # mplt.show()
# mimg.imsave(new_file,new_img)


inputfile = 'challenge2_short'
outputfile = inputfile + '_outputCOMBO'
clip1 = VideoFileClip(inputfile+'.mp4')  
white_clip = clip1.fl_image(processImage)  # NOTE: this function expects color images!!
white_clip.write_videofile(outputfile+'.mp4', audio=False)








예제 #23
0

    # Download youtube video
    ###########################################################################
    if not args.already_downloaded:
        remove_video_file(args.tmp_filepath)
        subprocess.check_call(
            'youtube-dl --no-continue --output "{}" "{}"'.format(
                args.tmp_filepath,
                args.youtube_url,
            ),
            shell=True,
        )

    clip = VideoFileClip(
        args.tmp_filepath,
        audio=False,
    )
    clip_frame0 = clip.get_frame(0)
    clip_resolution = (len(clip_frame0), len(clip_frame0[0]))
    if clip_resolution != TARGET_RESOLUTION:
        clip.reader.close()
        clip = VideoFileClip(
            args.tmp_filepath,
            audio=False,
            target_resolution=TARGET_RESOLUTION,
            resize_algorithm='fast_bilinear',
        )
    high_res_clip = VideoFileClip(
        args.tmp_filepath,
        audio=False,
        resize_algorithm='fast_bilinear',
예제 #24
0
def process_video_file(filepath,
                       output_dir=None,
                       suffix=None,
                       audio_model=None,
                       image_model=None,
                       input_repr="mel256",
                       content_type="music",
                       audio_embedding_size=6144,
                       audio_center=True,
                       audio_hop_size=0.1,
                       image_embedding_size=8192,
                       audio_batch_size=32,
                       image_batch_size=32,
                       overwrite=False,
                       verbose=True):
    """
    Computes and saves L3 audio and video frame embeddings for a given video file

    Note that image embeddings are computed for every frame of the video. Also
    note that embeddings for the audio and images are not temporally aligned.
    Please refer to the timestamps in the output files for the corresponding
    timestamps for each set of embeddings.

    Parameters
    ----------
    filepath : str or list[str]
        Path or list of paths to video file(s) to be processed.
    output_dir : str or None
        Path to directory for saving output files. If None, output files will
        be saved to the directory containing the input file.
    suffix : str or None
        String to be appended to the output filename,
        i.e. <base filename>_<modality>_<suffix>.npz.
        If None, then no suffix will be added,
        i.e. <base filename>_<modality>.npz.
    audio_model : keras.models.Model or None
        Loaded audio model object. If a model is provided, then `input_repr`,
        `content_type`, and `embedding_size` will be ignored.
        If None is provided, the model will be loaded using
        the provided values of `input_repr`, `content_type` and
        `embedding_size`.
    image_model : keras.models.Model or None
        Loaded audio model object. If a model is provided, then `input_repr`,
        `content_type`, and `embedding_size` will be ignored.
        If None is provided, the model will be loaded using
        the provided values of `input_repr`, `content_type` and
        `embedding_size`.
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for audio model. Ignored if `model` is
        a valid Keras model.
    content_type : "music" or "env"
        Type of content used to train the embedding model. Ignored if `model` is
        a valid Keras model.
    audio_embedding_size : 6144 or 512
        Audio embedding dimensionality. Ignored if `model` is a valid Keras model.
    audio_center : boolean
        If True, pads beginning of audio signal so timestamps correspond
        to center of window.
    audio_hop_size : float
        Hop size in seconds.
    image_embedding_size : 8192 or 512
        Video frame embedding dimensionality. Ignored if `model` is a valid Keras model.
    audio_batch_size : int
        Batch size used for input to audio embedding model
    image_batch_size : int
        Batch size used for input to image embedding model
    overwrite : bool
        If True, overwrites existing output files
    verbose : bool
        If True, prints verbose messages.

    Returns
    -------

    """
    if isinstance(filepath, six.string_types):
        filepath_list = [filepath]
    elif isinstance(filepath, list):
        filepath_list = filepath
    else:
        err_msg = 'filepath should be type str or list[str], but got {}.'
        raise OpenL3Error(err_msg.format(filepath))

    # Load models
    if not audio_model:
        audio_model = load_audio_embedding_model(input_repr, content_type,
                                                 audio_embedding_size)
    if not image_model:
        image_model = load_image_embedding_model(input_repr, content_type,
                                                 image_embedding_size)

    audio_suffix, image_suffix = "audio", "image"
    if suffix:
        audio_suffix += "_" + suffix
        image_suffix += "_" + suffix

    audio_list = []
    sr_list = []
    audio_batch_filepath_list = []
    total_audio_batch_size = 0

    image_list = []
    frame_rate_list = []
    image_batch_filepath_list = []

    num_files = len(filepath_list)
    for file_idx, filepath in enumerate(filepath_list):

        if not os.path.exists(filepath):
            raise OpenL3Error('File "{}" could not be found.'.format(filepath))

        if verbose:
            print("openl3: Processing {} ({}/{})".format(
                filepath, file_idx + 1, num_files))

        # Skip if overwriting isn't enabled and output file exists
        audio_output_path = get_output_path(filepath,
                                            audio_suffix + ".npz",
                                            output_dir=output_dir)
        image_output_path = get_output_path(filepath,
                                            image_suffix + ".npz",
                                            output_dir=output_dir)
        skip_audio = os.path.exists(audio_output_path) and not overwrite
        skip_image = os.path.exists(image_output_path) and not overwrite

        if skip_audio and skip_image:
            err_msg = "openl3: {} and {} exist, skipping."
            print(err_msg.format(audio_output_path, image_output_path))
            continue

        try:
            clip = VideoFileClip(filepath,
                                 target_resolution=(256, 256),
                                 audio_fps=TARGET_SR)
            audio = clip.audio.to_soundarray(fps=TARGET_SR)
            images = np.array([frame for frame in clip.iter_frames()])
        except Exception:
            err_msg = 'Could not open file "{}":\n{}'
            raise OpenL3Error(err_msg.format(filepath, traceback.format_exc()))

        if not skip_audio:
            audio_list.append(audio)
            sr_list.append(TARGET_SR)
            audio_batch_filepath_list.append(filepath)
            audio_len = audio.shape[0]
            audio_hop_length = int(audio_hop_size * TARGET_SR)
            num_windows = 1 + max(
                ceil((audio_len - TARGET_SR) / float(audio_hop_length)), 0)
            total_audio_batch_size += num_windows
        else:
            err_msg = "openl3: {} exists, skipping audio embedding extraction."
            print(err_msg.format(audio_output_path))

        if not skip_image:
            image_list.append(images)
            frame_rate_list.append(int(clip.fps))
            image_batch_filepath_list.append(filepath)
        else:
            err_msg = "openl3: {} exists, skipping image embedding extraction."
            print(err_msg.format(image_output_path))

        if (total_audio_batch_size >= audio_batch_size
                or file_idx == (num_files - 1)) and len(audio_list) > 0:
            embedding_list, ts_list \
                = get_audio_embedding(audio_list, sr_list, model=audio_model,
                                      input_repr=input_repr,
                                      content_type=content_type,
                                      embedding_size=audio_embedding_size,
                                      center=audio_center,
                                      hop_size=audio_hop_size,
                                      batch_size=audio_batch_size,
                                      verbose=verbose)
            for fpath, embedding, ts in zip(audio_batch_filepath_list,
                                            embedding_list, ts_list):
                output_path = get_output_path(fpath,
                                              audio_suffix + ".npz",
                                              output_dir=output_dir)

                np.savez(output_path, embedding=embedding, timestamps=ts)
                assert os.path.exists(output_path)

                if verbose:
                    print("openl3: Saved {}".format(output_path))

            audio_list = []
            sr_list = []
            audio_batch_filepath_list = []
            total_audio_batch_size = 0

        if (len(image_list) >= image_batch_size
                or file_idx == (num_files - 1)) and len(image_list) > 0:
            embedding_list, ts_list \
                = get_image_embedding(image_list, frame_rate_list,
                                      model=image_model, input_repr=input_repr,
                                      content_type=content_type,
                                      embedding_size=image_embedding_size,
                                      batch_size=image_batch_size,
                                      verbose=verbose)
            for fpath, embedding, ts in zip(image_batch_filepath_list,
                                            embedding_list, ts_list):
                output_path = get_output_path(fpath,
                                              image_suffix + ".npz",
                                              output_dir=output_dir)

                np.savez(output_path, embedding=embedding, timestamps=ts)
                assert os.path.exists(output_path)

                if verbose:
                    print("openl3: Saved {}".format(output_path))

            image_list = []
            frame_rate_list = []
            image_batch_filepath_list = []
예제 #25
0
def test_write_gif_ImageMagick():
    clip = VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0.2, 0.5)
    location = os.path.join(TMP_DIR, "imagemagick_gif.gif")
    clip.write_gif(location, program="ImageMagick")
    close_all_clips(locals())
예제 #26
0
def download_ava(root,
                 annotations,
                 checksums,
                 workers=8,
                 movies_dir=None):
    if movies_dir is None:
        movies_dir = root / 'cache' / 'ava_movies'
        movies_dir.mkdir(exist_ok=True, parents=True)

    logging.info(f'Downloading AVA videos.')
    videos = [
        v for v in annotations['videos'] if v['metadata']['dataset'] == 'AVA'
    ]

    movie_clips = defaultdict(list)
    for v in videos:
        movie_clips[v['metadata']['movie']].append(v)

    movie_info = ava_load_meta()

    videos_dir = root / 'videos'
    frames_root = root / 'frames'
    for movie_stem, clips in tqdm(movie_clips.items(),
                                  desc='Processing AVA movies'):
        movie = f"{movie_stem}.{movie_info[movie_stem]['ext']}"

        # List of (clip, output clip path, output frames directory) for clips
        # whose frames have not already been extracted.
        to_process = []
        for clip in clips:
            name = clip['name']
            output_clip = file_path(videos_dir / f"{name}.mp4")
            output_frames = dir_path(frames_root / name)
            if are_tao_frames_dumped(output_frames,
                                     checksums[name],
                                     warn=False):
                logging.debug(f'Skipping extracted clip: {name}')
                continue
            to_process.append((clip, output_clip, output_frames))

        # Download movie if necessary.
        if all(x[1].exists() for x in to_process):
            movie_vfc = None
        else:
            if movies_dir and (movies_dir / movie).exists():
                downloaded_movie_this_run = False
                movie_path = movies_dir / movie
                logging.debug(f'Found AVA movie {movie} at {movie_path}')
            else:
                downloaded_movie_this_run = True
                movie_path = movies_dir / movie
                if not movie_path.exists():
                    logging.debug(f'Downloading AVA movie: {movie}.')
                    url = (
                        f"{AVA_URL}/{movie_info[movie_stem]['split']}/{movie}")
                    urllib.request.urlretrieve(url, movie_path)
            movie_vfc = VideoFileClip(str(movie_path))

        for clip_info, clip_path, frames_dir in tqdm(to_process,
                                                     desc='Extracting shots',
                                                     leave=False):
            if clip_path.exists():
                continue
            shot_endpoints = clip_info['metadata']['scene'].rsplit('_', 1)[1]
            start, end = shot_endpoints.split('-')
            subclip = movie_vfc.subclip(
                int(start) / movie_vfc.fps,
                int(end) / movie_vfc.fps)
            subclip.write_videofile(str(clip_path),
                                    audio=False,
                                    verbose=False,
                                    progress_bar=False)
            close_clip(subclip)

        if movie_vfc:
            close_clip(movie_vfc)
            if downloaded_movie_this_run:
                movie_path.unlink()

        logging.debug(
            f'AVA: Dumping TAO frames:\n{[x[1:] for x in to_process]}')
        dump_tao_frames([x[1] for x in to_process], [x[2] for x in to_process],
                        workers)
        for clip, clip_path, frame_dir in to_process:
            if not are_tao_frames_dumped(frame_dir, checksums[clip['name']]):
                raise ValueError(
                    f'Not all TAO frames for {clip["name"]} were extracted. '
                    f'Try deleting the clip at {clip_path} and running this '
                    f'script again.')
            remove_non_tao_frames(frame_dir,
                                  set(checksums[clip['name']].keys()))
            assert are_tao_frames_dumped(frame_dir, checksums[clip['name']]), (
                f'ERROR: TAO frames were dumped properly for {clip["name"]}, '
                f'but were deleted by `remove_non_tao_frames`! This is a bug, '
                f'please report it.')
예제 #27
0
def test_save_frame():
    clip = VideoFileClip("media/big_buck_bunny_432_433.webm")
    location = os.path.join(TMP_DIR, "save_frame.png")
    clip.save_frame(location, t=0.5)
    assert os.path.isfile(location)
    close_all_clips(locals())
예제 #28
0
def test_cuts1():
    clip = VideoFileClip("media/big_buck_bunny_432_433.webm").resize(0.2)
    cuts.find_video_period(clip) == pytest.approx(0.966666666667, 0.0001)
    close_all_clips(locals())
예제 #29
0
            TxtFile = pureName + ".txt"
            TxtFile = os.path.join(MdPath, TxtFile)
            if os.path.exists(TxtFile):
                TxtPath = MdPath
            else:
                print("No srt or txt file and exist")
                final_out(21)
    else:
        if SrtPath is None or len(SrtPath) == 0:
            SrtPath = os.path.dirname(SrtFile)

    print("MP4 {}/{} TXT {}/{} SRT {}/{} ".format(MdPath, MdFile, TxtFile,
                                                  TxtPath, SrtFile, SrtPath))

    print("Reading length of the video")
    vidoclip = VideoFileClip(MdFile)
    VideoLen = vidoclip.duration
    if TxtFile:
        read_text_srt(TxtFile)
        convert_txt_to_srt(TxtFile, SrtFile)
    elif SrtFile:
        print("Load SrtFile {} directlly".format(SrtFile))
        load_srt_file(SrtFile)
    else:
        print("No text or srt file ")
        final_out(23)

    if num_itm:
        print("Convert txt to speach from baidu")
        baidu_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        # time.sleep(8)
예제 #30
0
 def load_clip(self, filename: str):
     audio_fps = self.get_audio_sampling_rate(filename)
     self.clip = VideoFileClip(filename, audio_fps)