def _compile_worker(session_key: str, video_id: str) -> None: # Use this for conditional creation config = get_nosql_handler().get_video_config(video_id) session_clips = get_sql_handler().get_session_clips_by_session_key(session_key) for session_clip in session_clips: download_session_clip(session_clip, sync=True) session_audios = get_sql_handler().get_session_audio_by_session_key(session_key) session_audio = None if session_audios: session_audio = session_audios[0] download_session_audio(session_audio, sync=True) # Create VideoFileClips clips = [VideoFileClip(session_clip.local_file_path()) for session_clip in session_clips] total_duration = sum([clip.duration for clip in clips]) # Make all clips the same size final_w = min([clip.w for clip in clips]) final_h = min([clip.h for clip in clips]) clips = [resize(clip, newsize=(final_w, final_h)) for clip in clips] # Adding gamertag and logo to the video # gamertag = config.get('gamertag', '') # gamertag_position = config.get('gamertag_position', ['right', 'bottom']) # # if gamertag != '': # gamertag_clip = TextClip(txt='@'+gamertag, fontsize=50, font = 'Comfortaa', color='white') # gamertag_clip = gamertag_clip.set_duration(final.duration)\ # .margin(right=8,top = 8, left=8, bottom=8, opacity=0)\ # .set_position((gamertag_position[0], gamertag_position[1]) # === WATERMARK === logo_position = config.get('logo_position', ['left', 'top']) logo_clip = ImageClip('./reels/static/reels/reels-logo-white.png') logo_clip = resize(logo_clip, height=final_h / 5) try: logo_x = (0 if logo_position[0] == 'left' else final_w - logo_clip.w) logo_y = (0 if logo_position[1] == 'top' else final_h - logo_clip.h) except (KeyError, TypeError): logo_x, logo_y = 0, final_h - logo_clip.h logo_clip = logo_clip.set_pos((logo_x, logo_y)) clips = [CompositeVideoClip([clip, logo_clip.set_duration(clip.duration)]) for clip in clips] # Concatenate clips final = concatenate_videoclips(clips, method="compose") # Add audio, only if there is audio audio_clip = None if session_audio: audio_clip = AudioFileClip(session_audio.local_file_path()) audio_clip = audio_clip \ .set_start(config.get('audio_start', 0)) \ .set_end(config.get('audio_end', audio_clip.duration)) # Attach audio to video, but make it only as long as the videos are # TODO: Manage case where videos are longer than audio clip final = final.set_audio(audio_clip.set_duration(final.duration)) # If extra editing is enabled, do so if config.get('extras', False) and session_audio and get_file_type(session_audio.local_file_path()) == 'wav': fs, data = read(session_audio.local_file_path()) data = data[:, 0] data = data[:len(data) - len(data) % 48000] data2 = np.mean(data.reshape(-1, int(48000 / 4)), axis=1) x = np.diff(data2, n=1) secs = np.where(x > 200)[0] t = list(secs[np.where(np.diff(secs) > 12)[0] + 1]) if np.diff(secs)[0] > 12: t.insert(0, secs[0]) for i in range(0, len(t)): t[i] /= 4 for i in t: tfreeze = i if tfreeze + 1.75 >= final.duration: break clip_before = final.subclip(t_end=tfreeze) clip_after = final.subclip(t_start=tfreeze + 1) clip = final.subclip(t_start=tfreeze, t_end=tfreeze + 1) if int(i) % 2 == 0: clip = clip.fl_image(invert_colors).crossfadein(0.5).crossfadeout(0.5) else: clip = clip.fx(vfx.painting, saturation=1.6, black=0.006).crossfadein(0.5).crossfadeout(0.5) final = concatenate_videoclips([clip_before, clip, clip_after]) else: pass # === Final Saving === video = get_sql_handler().get_video(video_id) final.write_videofile(filename=video.local_file_path(), verbose=True, codec="libx264", audio_codec='aac', temp_audiofile=f'temp-audio-{video.video_id}.m4a', remove_temp=True, preset="medium", ffmpeg_params=["-profile:v", "baseline", "-level", "3.0", "-pix_fmt", "yuv420p"]) # close local files because we don't need them anymore and so they can be removed later for clip in clips: clip.close() if audio_clip: audio_clip.close() # upload to cold storage save_video(video, sync=True, clean=False)
def test_withoutaudio(): clip = VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0.2, 0.6) new_clip = clip.without_audio() assert new_clip.audio is None close_all_clips(locals())
def test_write_gif_imageio(): clip = VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0.2, 0.8) location = os.path.join(TMP_DIR, "imageio_gif.gif") clip.write_gif(location, program="imageio") assert os.path.isfile(location) close_all_clips(locals())
import re """ Main File which takes a video as input and cuts the AD sections out """ ap = argparse.ArgumentParser() ap.add_argument('-v', '--video', required=True) ap.add_argument('-c', '--count', type=int, required=True) ap.add_argument('-m', '--model', required=True) ap.add_argument('-o', '--offset') args = vars(ap.parse_args()) print(f'[STATUS] Cutting images every {args["count"]} seconds') # Load video video_f = VideoFileClip(args['video']) image_data = [] multiplier = args['count'] def cut_images(start, end): for x in range(start, end + 1): video_f.save_frame(f'{TMP_PATH}/{str(x)}.jpg', t=x * multiplier) total_frame_count = video_f.duration image_cut_count = int(total_frame_count / multiplier) num_threads = 8 for x in range(num_threads):
def test_write_gif_ffmpeg_pixel_format(): clip = VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0.2, 0.4) location = os.path.join(TMP_DIR, "ffmpeg_gif.gif") clip.write_gif(location, program="ffmpeg", pixel_format="bgr24") assert os.path.isfile(location) close_all_clips(locals())
def detect_on_video(): video = VideoFileClip(IN_VIDEO_PATH) output_video = video.fl_image(process_video) output_video.write_videofile(OUT_VIDEO_PATH, audio=False)
def process(text_file, video_file, align_file, label_file): with open(text_file, 'r') as f: content = f.read() #todo decipher the gender of the speaker using the text file. should i automate it or make it all by hand????? big question time_pairs = get_alignments(align_file) lines = content.splitlines() times = [] for line in lines: x = (line.split(': ')[0].strip().split(' ')[1]) # print(x) # print(float(x.split('-')[0].split('[')[1])) times.append([ float(x.split('-')[0].split('[')[1]), float(x.split('-')[1].split(']')[0]) ]) main_features = [] with VideoFileClip(video_file) as video: for i in times: feats = [] new = video.subclip(i[0], i[1]) new.write_videofile('video.mp4', audio_codec='aac') with VideoFileClip('video.mp4') as sentence_video: for k in time_pairs: word_video = sentence_video.subclip( time_pairs[0], time_pairs[1]) word_video.write('word_video.mp4', audio_codec='aac') fImages = [] mImages = [] cap = cv2.VideoCapture('testing.mp4') hog_face_detector = dlib.get_frontal_face_detector() while True: ret, img = cap.read() # print(img) if isinstance(img, type(None)): break else: height, width = img.shape[:2] start_row, start_col = int(0), int(0) end_row, end_col = int(height), int(width * 0.5) cropped_top = img[start_row:end_row, start_col:end_col] faces_hog = hog_face_detector(cropped_top, 1) for face in faces_hog: x = face.left() y = face.top() w = face.right() - x h = face.bottom() - y cropping = cropped_top[y:y + h, x:x + w] cropping = cv2.resize(cropping, (125, 125)) # cv2_imshow(cropping) fImages.append(cropping) # cv2_imshow(cropped_top) cv2.waitKey(0) cv2.destroyAllWindows() # Let's get the starting pixel coordiantes (top left of cropped bottom) start_row, start_col = int(0), int(width * 0.5) # Let's get the ending pixel coordinates (bottom right of cropped bottom) end_row, end_col = int(height), int(width) cropped_bot = image[start_row:end_row, start_col:end_col] # print(start_row, end_row ) # print(start_col, end_col) mImages.append(cropped_bot) # cv2_imshow( cropped_bot) cv2.waitKey(0) cv2.destroyAllWindows()
def extract_audio(input_video_path, input_audio_path='./data/test.wav'): video = VideoFileClip(input_video_path) audio = video.audio audio.write_audiofile(input_audio_path, codec='pcm_s16le')
def get_test_video(): return VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0, 1)
def align_videos(subject, date): frame_buffer = 10 base_video_path=os.path.join(cfg['preprocessed_data_dir'],subject, date,'video') with open(os.path.join(base_video_path,'config.json')) as json_file: vid_cfg=json.load(json_file) fnames=sorted(glob.glob(os.path.join(base_video_path, 'front', '%s*.avi' % date))) rois_checked={ 'front':False, 'side':False, 'top':False } crop_checked={ 'front':False, 'side':False, 'top':False } # For each file (filenames are same in each view directory) for fname in fnames: fname=os.path.split(fname)[-1] print('Processing %s' % fname) blue_onsets={} yellow_onsets={} blue_ts={} yellow_ts={} video_nframes={} # Whether or not to use LED for alignment led_based = True for view in cfg['camera_views']: video_path=os.path.join(base_video_path, view) clip = VideoFileClip(os.path.join(video_path, fname)) n_frames_approx = int(np.ceil(clip.duration * clip.fps) + frame_buffer) n_frames = n_frames_approx clip.reader.initialize() # Initialize LED time series for this view blue_ts[view]=[] yellow_ts[view]=[] for index in range(n_frames_approx): image = img_as_ubyte(clip.reader.read_frame()) # If not already set, show GUI to select blue LED ROI if not rois_checked[view]: blue_led_roi_area=vid_cfg['blue_led_roi_areas'][view] blue_cropped_img=image[blue_led_roi_area[2]:blue_led_roi_area[3], blue_led_roi_area[0]:blue_led_roi_area[1],:] init_roi=None if view in vid_cfg['blue_led_rois'] and vid_cfg['blue_led_rois'][view] is not None: init_roi=vid_cfg['blue_led_rois'][view] init_roi[0] = init_roi[0] - blue_led_roi_area[0] init_roi[1] = init_roi[1] - blue_led_roi_area[0] init_roi[2] = init_roi[2] - blue_led_roi_area[2] init_roi[3] = init_roi[3] - blue_led_roi_area[2] vid_cfg['blue_led_rois'][view] = select_crop_parameters.show(blue_cropped_img, 'Select blue LED ROI', init_coords=init_roi) vid_cfg['blue_led_rois'][view][0] = vid_cfg['blue_led_rois'][view][0] + blue_led_roi_area[0] vid_cfg['blue_led_rois'][view][1] = vid_cfg['blue_led_rois'][view][1] + blue_led_roi_area[0] vid_cfg['blue_led_rois'][view][2] = vid_cfg['blue_led_rois'][view][2] + blue_led_roi_area[2] vid_cfg['blue_led_rois'][view][3] = vid_cfg['blue_led_rois'][view][3] + blue_led_roi_area[2] yellow_led_roi_area = vid_cfg['yellow_led_roi_areas'][view] yellow_cropped_img = image[yellow_led_roi_area[2]:yellow_led_roi_area[3], yellow_led_roi_area[0]:yellow_led_roi_area[1], :] init_roi = None if view in vid_cfg['yellow_led_rois'] and vid_cfg['yellow_led_rois'][view] is not None: init_roi = vid_cfg['yellow_led_rois'][view] init_roi[0] = init_roi[0] - yellow_led_roi_area[0] init_roi[1] = init_roi[1] - yellow_led_roi_area[0] init_roi[2] = init_roi[2] - yellow_led_roi_area[2] init_roi[3] = init_roi[3] - yellow_led_roi_area[2] vid_cfg['yellow_led_rois'][view] = select_crop_parameters.show(yellow_cropped_img, 'Select yellow LED ROI', init_coords=init_roi) vid_cfg['yellow_led_rois'][view][0] = vid_cfg['yellow_led_rois'][view][0] + yellow_led_roi_area[0] vid_cfg['yellow_led_rois'][view][1] = vid_cfg['yellow_led_rois'][view][1] + yellow_led_roi_area[0] vid_cfg['yellow_led_rois'][view][2] = vid_cfg['yellow_led_rois'][view][2] + yellow_led_roi_area[2] vid_cfg['yellow_led_rois'][view][3] = vid_cfg['yellow_led_rois'][view][3] + yellow_led_roi_area[2] rois_checked[view]=True if index == int(n_frames_approx - frame_buffer * 2): last_image = image elif index > int(n_frames_approx - frame_buffer * 2): if (image == last_image).all(): n_frames = index break # Crop image around blue LED, get only blue channel blue_roi=vid_cfg['blue_led_rois'][view] blue_led_image = image[blue_roi[2]:blue_roi[3], blue_roi[0]:blue_roi[1], 2] # Add average of cropped image to blue LED timeseries blue_ts[view].append(np.mean(blue_led_image)) # Crop image around yellow LED, average red and green channels yellow_roi=vid_cfg['yellow_led_rois'][view] yellow_led_image = np.mean(image[yellow_roi[2]:yellow_roi[3], yellow_roi[0]:yellow_roi[1], 0:1],axis=2) # Add average of cropped image to yellow LED timeseries yellow_ts[view].append(np.mean(yellow_led_image)) blue_ts[view] = np.array(blue_ts[view]) yellow_ts[view] = np.array(yellow_ts[view]) # Normalize based on first 10 time steps if len(blue_ts[view])>10: blue_ts[view]=(blue_ts[view]-np.mean(blue_ts[view][0:10]))/np.mean(blue_ts[view][0:10]) if len(yellow_ts[view])>10: yellow_ts[view]=(yellow_ts[view]-np.mean(yellow_ts[view][0:10]))/np.mean(yellow_ts[view][0:10]) blue_ts[view]=blue_ts[view]/np.max(blue_ts[view]) yellow_ts[view]=yellow_ts[view]/np.max(yellow_ts[view]) # plt.figure() # plt.subplot(2,1,1) # plt.plot(video_blue_brightness) # plt.subplot(2, 1, 2) # plt.plot(video_yellow_brightness) # plt.show() # Get derivative of blue and yellow ts #blue_diff=np.diff(blue_ts[view]) #yellow_diff=np.diff(yellow_ts[view]) # Get peak blue and yellow LED change times #blue_peak=np.max(blue_diff) blue_peak = np.max(blue_ts[view]) #yellow_peak=np.max(yellow_diff) yellow_peak = np.max(yellow_ts[view]) # If none above 0.05, don't use LEDs for aligning #if blue_peak<.05 or yellow_peak<.05: if len(blue_ts[view])<10 or np.max(blue_ts[view][10:]) < .25 or np.max(yellow_ts[view])<.25: led_based=False print('Cant figure out LED onset - not using') # Otherwise, use the first time point after 25 time points where LED diff exceeds 0.05 else: #blue_onsets[view] = np.where(blue_diff >= 0.05)[0][0] #np.where(blue_ts[view] >= 0.25)[0][0] blue_onsets[view] = 10 + np.where(blue_ts[view][10:] >= 0.25)[0][0] #yellow_onsets[view] = np.where(yellow_diff >= 0.05)[0][0] yellow_onsets[view] = np.where(yellow_ts[view] >= 0.25)[0][0] video_nframes[view]=n_frames # Use first view where blue LED exceeds threshold as reference to align to if len(blue_onsets.values())>0: min_blue_onset=min(blue_onsets.values()) # if fname=='15-05-2019_10-34-15_11.avi': # plt.figure() # for view in cfg['camera_views']: # plt.plot(blue_ts[view], label='%s: blue' % view) # plt.plot(yellow_ts[view], label='%s: yellow' % view) # plt.legend() # plt.show() # Compute trial duration based on each view trial_durations={} for view in cfg['camera_views']: if view in blue_onsets and view in yellow_onsets: # Trial duration (in ms) trial_duration=(yellow_onsets[view]-blue_onsets[view])*1.0/clip.fps*1000.0 # there is an 850ms delay before blue LED comes on if trial_duration>0: trial_duration=trial_duration+850.0 trial_durations[view]=trial_duration print('%s: %.2fms' % (view, trial_duration)) #assert(len(trial_durations)>0 and all(x == trial_durations[0] for x in trial_durations)) start_frames_to_cut={} n_frames_after_cutting = {} # Cut frames to align videos and crop for idx,view in enumerate(cfg['camera_views']): # using LED to align if led_based: start_frames_to_cut[view]=blue_onsets[view]-min_blue_onset # otherwise - use standard # of frames to crop (order of video triggering is top, side, front) if not led_based or start_frames_to_cut[view]>5: start_frames_to_cut[view] = 0 if view=='front': start_frames_to_cut[view]=2 elif view=='side': start_frames_to_cut[view]=1 n_frames_after_cutting[view]=video_nframes[view]-start_frames_to_cut[view] new_nframes=min(n_frames_after_cutting.values()) intrinsic_files = {} for view in cfg['camera_views']: dlc3d_cfg = os.path.join('/data/tool_learning/preprocessed_data/dlc_projects', 'visual_grasp_3d-Jimmy-2019-08-19-3d', 'config.yaml') cfg_3d = auxiliaryfunctions.read_config(dlc3d_cfg) img_path, path_corners, path_camera_matrix, path_undistort = auxiliaryfunctions_3d.Foldernames3Dproject( cfg_3d) path_intrinsic_file = os.path.join(path_camera_matrix, '%s_intrinsic_params.pickle' % view) intrinsic_file = auxiliaryfunctions.read_pickle(path_intrinsic_file) intrinsic_files[view] = intrinsic_file[view] for idx, view in enumerate(cfg['camera_views']): camera_matrix = intrinsic_files[view]['mtx'] distortion_coefficients = intrinsic_files[view]['dist'] end_frames_to_cut=n_frames_after_cutting[view]-new_nframes print('cutting %d frames from beginning and %d frames from end of %s' % (start_frames_to_cut[view], end_frames_to_cut, view)) # Cut frames from blue and yellow LED time series and onsets if end_frames_to_cut>0: blue_ts[view]=blue_ts[view][start_frames_to_cut[view]:-end_frames_to_cut] yellow_ts[view] = yellow_ts[view][start_frames_to_cut[view]:-end_frames_to_cut] else: blue_ts[view] = blue_ts[view][start_frames_to_cut[view]:] yellow_ts[view] = yellow_ts[view][start_frames_to_cut[view]:] if view in blue_onsets: blue_onsets[view]=blue_onsets[view]-start_frames_to_cut[view] if view in yellow_onsets: yellow_onsets[view]=yellow_onsets[view]-start_frames_to_cut[view] # Load video and cut frames from beginning video_path = os.path.join(base_video_path, view) clip = VideoFileClip(os.path.join(video_path, fname)) # Crop limits based on view frames=[] n_frames_approx = int(np.ceil(clip.duration * clip.fps)+frame_buffer) for index in range(n_frames_approx): image = img_as_ubyte(clip.reader.read_frame()) image = cv2.undistort(image, camera_matrix, distortion_coefficients) if index>=start_frames_to_cut[view]: if not crop_checked[view]: init_crop_lims = None if view in vid_cfg['crop_limits'] and vid_cfg['crop_limits'][view] is not None: init_crop_lims = vid_cfg['crop_limits'][view] vid_cfg['crop_limits'][view] = select_crop_parameters.show(image, 'Select crop limits', init_coords=init_crop_lims) crop_checked[view]=True # Crop image and save to video crop_lims=vid_cfg['crop_limits'][view] image=image[crop_lims[2]:crop_lims[3], crop_lims[0]:crop_lims[1], :] frames.append(image) if len(frames)==new_nframes: break clip.close() # Check that have the right number of frames assert(len(frames)==new_nframes) # Create new video clip (cropped and aligned) video_path = os.path.join(base_video_path, view) new_clip = VideoProcessorCV(sname=os.path.join(video_path, fname), fps=clip.fps, codec='mp4v', sw=crop_lims[1] - crop_lims[0], sh=crop_lims[3] - crop_lims[2]) for frame in frames: new_clip.save_frame(np.uint8(frame)) new_clip.close() # Make everything hashable for view in cfg['camera_views']: blue_ts[view]=blue_ts[view].tolist() yellow_ts[view] = yellow_ts[view].tolist() if view in blue_onsets: blue_onsets[view]=int(blue_onsets[view]) if view in yellow_onsets: yellow_onsets[view]=int(yellow_onsets[view]) if view in trial_durations: trial_durations[view]=float(trial_durations[view]) # Save video info to JSON data = { 'blue_roi': vid_cfg['blue_led_rois'], 'yellow_roi': vid_cfg['yellow_led_rois'], 'blue_ts': blue_ts, 'yellow_ts': yellow_ts, 'blue_onset': blue_onsets, 'yellow_onset': yellow_onsets, 'trial_duration': trial_durations, 'fname': fname } [base, ext] = os.path.splitext(fname) with open(os.path.join(base_video_path, '%s.json' % base), 'w') as outfile: json.dump(data, outfile) print('') with open(os.path.join(base_video_path,'config.json'),'w') as outfile: json.dump(vid_cfg, outfile)
def combine_video(base_video_path, fnames, out_path, out_fname): # Output video size out_size = [2040, 1084] frame_buffer = 10 # Process videos front_video = VideoFileClip(os.path.join(base_video_path, 'front', fnames['front'])) side_video = VideoFileClip(os.path.join(base_video_path, 'side', fnames['side'])) top_video = VideoFileClip(os.path.join(base_video_path, 'top', fnames['top'])) if '3d' in fnames: video_3d=VideoFileClip(os.path.join(base_video_path, fnames['3d'])) fps = front_video.fps # Create new clip and write frames new_clip = VideoProcessorCV( sname=os.path.join(out_path, out_fname), fps=fps, codec='mp4v', sw=out_size[0], sh=out_size[1]) n_frames_approx = int(np.ceil(front_video.duration * front_video.fps) + frame_buffer) n_frames = n_frames_approx front_video.reader.initialize() side_video.reader.initialize() top_video.reader.initialize() if '3d' in fnames: video_3d.reader.initialize() for index in range(n_frames_approx): front_image = img_as_ubyte(front_video.reader.read_frame()) if index == int(n_frames_approx - frame_buffer * 2): last_front_image = front_image elif index > int(n_frames_approx - frame_buffer * 2): if (front_image == last_front_image).all(): n_frames = index break side_image = img_as_ubyte(side_video.reader.read_frame()) if index == int(n_frames_approx - frame_buffer * 2): last_side_image = side_image elif index > int(n_frames_approx - frame_buffer * 2): if (side_image == last_side_image).all(): n_frames = index break top_image = img_as_ubyte(top_video.reader.read_frame()) if index == int(n_frames_approx - frame_buffer * 2): last_top_image = top_image elif index > int(n_frames_approx - frame_buffer * 2): if (top_image == last_top_image).all(): n_frames = index break if '3d' in fnames: image_3d = img_as_ubyte(video_3d.reader.read_frame()) if index == int(n_frames_approx - frame_buffer * 2): last_3d_image = image_3d elif index > int(n_frames_approx - frame_buffer * 2): if (image_3d == last_3d_image).all(): n_frames = index break # Resize frame front_factor = np.min([out_size[0] / 2.0 / front_image.shape[1], out_size[1] / 2.0 / front_image.shape[0]]) front_image = cv2.resize(front_image, None, fx=front_factor, fy=front_factor) side_factor = np.min([out_size[0] / 2.0 / side_image.shape[1], out_size[1] / 2.0 / side_image.shape[0]]) side_image = cv2.resize(side_image, None, fx=side_factor, fy=side_factor) top_factor = np.min([out_size[0] / 2.0 / top_image.shape[1], out_size[1] / 2.0 / top_image.shape[0]]) top_image = cv2.resize(top_image, None, fx=top_factor, fy=top_factor) if '3d' in fnames: factor_3d = np.min([out_size[0] / 2.0 / image_3d.shape[1], out_size[1] / 2.0 / image_3d.shape[0]]) image_3d = cv2.resize(image_3d, None, fx=factor_3d, fy=factor_3d) # Initialize new frame and add front image to it new_frame = np.zeros((out_size[1], out_size[0], 3)) extra_x_space = out_size[0] / 2 - front_image.shape[1] extra_y_space = out_size[1] / 2 - front_image.shape[0] start_x=int(out_size[1] / 2 + extra_y_space / 2) end_x=int(out_size[1] / 2 + front_image.shape[0] + extra_y_space / 2) start_y=int(0 + extra_x_space / 2) end_y=int(front_image.shape[1] + extra_x_space / 2) new_frame[start_x:end_x,start_y:end_y, :] = front_image # Add side image to frame extra_x_space = out_size[0] / 2 - side_image.shape[1] extra_y_space = out_size[1] / 2 - side_image.shape[0] start_x=int(0 + extra_y_space / 2) end_x=int(side_image.shape[0] + extra_y_space / 2) start_y=int(out_size[0] / 2 + extra_x_space / 2) end_y=int(out_size[0] / 2 + side_image.shape[1] + extra_x_space / 2) new_frame[start_x:end_x,start_y:end_y,:] = side_image # Add top image to frame extra_x_space = out_size[0] / 2 - top_image.shape[1] extra_y_space = out_size[1] / 2 - top_image.shape[0] start_x=int(0 + extra_y_space / 2) end_x=int(top_image.shape[0] + extra_y_space / 2) start_y=int(0 + extra_x_space / 2) end_y=int(top_image.shape[1] + extra_x_space / 2) new_frame[start_x:end_x,start_y:end_y, :] = top_image if '3d' in fnames: extra_x_space = out_size[0] / 2 - image_3d.shape[1] extra_y_space = out_size[1] / 2 - image_3d.shape[0] start_x = int(out_size[1] / 2 + extra_y_space / 2) end_x = int(out_size[1] / 2 + image_3d.shape[0] + extra_y_space / 2) start_y = int(out_size[0] / 2 + extra_x_space / 2) end_y = int(out_size[0] / 2 + image_3d.shape[1] + extra_x_space / 2) new_frame[start_x:end_x, start_y:end_y, :] = image_3d new_clip.save_frame(np.uint8(new_frame)) front_video.close() del front_video side_video.close() del side_video top_video.close() del top_video if '3d' in fnames: video_3d.close() del video_3d new_clip.close()
input_delay = 0.35 #++ kdyz je hudba driv, -- kdyz je pozdeji delay = 0.36-input_delay #o kolik sekund je posunuty audio position = 0.36 output_video_path = 'test.mp4' clips = [] for i in range(pocet_klipu): delka = beats[i]+after_delay-position zacatek = shots[i]+after_delay-delka shot=zacatek+delka-after_delay print(delka) clips.append(VideoFileClip(paths[i]).subclip(zacatek,zacatek+delka)) position+=delka audioclip = AudioFileClip(song_path).subclip(delay,beats[pocet_klipu-1]+after_delay) final_clip = concatenate_videoclips(clips).set_audio(audioclip) final_clip.write_videofile(output_video_path, audio_codec='aac') #final_clip.preview()
from moviepy.video.io.VideoFileClip import VideoFileClip in_vid_path = "C:\\Users\\Brandon\\Documents\\Personal_Projects\\vid_m_comp_big_data\\current_data\\downloaded_clips\\post_0010.mp4" out_vid_path = "C:\\Users\\Brandon\\Documents\\Personal_Projects\\vid_m_comp_big_data\\vids\\post_0010__trimmed.mp4" # time_tup = (10,17) # input_video_path = 'myPath/vid1.mp4' # output_video_path = 'myPath/output/vid1.mp4' with VideoFileClip(in_vid_path) as video: new = video.subclip(10, 17) new.write_videofile(out_vid_path, audio_codec='aac') # trim_vid(in_vid_path, out_vid_path, time_tup)
def convert(self, video_file, swap_model = False, duration = None, start_time = None, use_gan = False, face_filter = False, photos = True, crop_x = None, width = None, side_by_side = False): # Magic incantation to not have tensorflow blow up with an out of memory error. import tensorflow as tf import keras.backend.tensorflow_backend as K config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list="0" K.set_session(tf.Session(config=config)) # Load model model_name = "Original" converter_name = "Masked" if use_gan: model_name = "GAN" converter_name = "GAN" model = PluginLoader.get_model(model_name)(Path(self._model_path(use_gan))) if not model.load(swap_model): print('model Not Found! A valid model must be provided to continue!') exit(1) # Load converter converter = PluginLoader.get_converter(converter_name) converter = converter(model.converter(False), blur_size=8, seamless_clone=True, mask_type="facehullandrect", erosion_kernel_size=None, smooth_mask=True, avg_color_adjust=True) # Load face filter filter_person = self._person_a if swap_model: filter_person = self._person_b filter = FaceFilter(self._people[filter_person]['faces']) # Define conversion method per frame def _convert_frame(frame, convert_colors = True): if convert_colors: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Swap RGB to BGR to work with OpenCV for face in detect_faces(frame, "cnn"): if (not face_filter) or (face_filter and filter.check(face)): frame = converter.patch_image(frame, face) frame = frame.astype(numpy.float32) if convert_colors: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Swap RGB to BGR to work with OpenCV return frame def _convert_helper(get_frame, t): return _convert_frame(get_frame(t)) media_path = self._video_path({ 'name' : video_file }) if not photos: # Process video; start loading the video clip video = VideoFileClip(media_path) # If a duration is set, trim clip if duration: video = video.subclip(start_time, start_time + duration) # Resize clip before processing if width: video = video.resize(width = width) # Crop clip if desired if crop_x: video = video.fx(crop, x2 = video.w / 2) # Kick off convert frames for each frame new_video = video.fl(_convert_helper) # Stack clips side by side if side_by_side: def add_caption(caption, clip): text = (TextClip(caption, font='Amiri-regular', color='white', fontsize=80). margin(40). set_duration(clip.duration). on_color(color=(0,0,0), col_opacity=0.6)) return CompositeVideoClip([clip, text]) video = add_caption("Original", video) new_video = add_caption("Swapped", new_video) final_video = clips_array([[video], [new_video]]) else: final_video = new_video # Resize clip after processing #final_video = final_video.resize(width = (480 * 2)) # Write video output_path = os.path.join(self.OUTPUT_PATH, video_file) final_video.write_videofile(output_path, rewrite_audio = True) # Clean up del video del new_video del final_video else: # Process a directory of photos for face_file in os.listdir(media_path): face_path = os.path.join(media_path, face_file) image = cv2.imread(face_path) image = _convert_frame(image, convert_colors = False) cv2.imwrite(os.path.join(self.OUTPUT_PATH, face_file), image)
# CinEditorML model Code # Result Video Name: scenario2 from moviepy.editor import * import numpy as np from moviepy.video.tools.segmenting import findObjects from moviepy.video.io.VideoFileClip import VideoFileClip import os ### Video clip1 = VideoFileClip('videoTest3.mp4') ### Text _begin = TextClip("beginningText", fontsize=70, color='white') _begin = _begin.set_position('center').set_duration(10) begin = CompositeVideoClip([_begin], size=[1920, 1080]) ### Specific Video Part clip1a = clip1.subclip('00:23', '01:47') clip1a.write_videofile("clip1a.mp4", fps=30) ### Specific Video Part clip1b = clip1.subclip('02:01', '02:21') clip1b.write_videofile("clip1b.mp4", fps=30) ### Text
def download_hacs(root, annotations, checksums, workers=8, debug=False): logging.info(f'Downloading HACS videos.') videos = [ v for v in annotations['videos'] if v['metadata']['dataset'] == 'HACS' ] if debug: # Take 5 of each type of video. _scene_videos = [ v for v in videos if v['metadata']['scene'] is not None ] _noscene_videos = [v for v in videos if v['metadata']['scene'] is None] videos = _scene_videos[:5] + _noscene_videos[:5] videos_dir = root / 'videos' frames_dir = root / 'frames' tmp_dir = dir_path(root / 'cache' / 'hacs_videos') missing_dir = Path(root / 'hacs_missing') # List of (video, video_path, frame_path) videos_to_dump = [] unavailable_videos = [] for video in tqdm(videos, desc='Downloading HACS'): video_path = file_path(videos_dir / f"{video['name']}.mp4") frame_output = dir_path(frames_dir / video['name']) if are_tao_frames_dumped(frame_output, checksums[video['name']], warn=False): continue if not video_path.exists(): ytid = video['metadata']['youtube_id'] full_video = tmp_dir / f"v_{ytid}.mp4" missing_downloaded = missing_dir / f"{ytid}.mp4" if missing_downloaded.exists(): logging.info( f'Found video downloaded by user at {missing_downloaded}.') shutil.copy2(missing_downloaded, full_video) if not full_video.exists(): url = 'http://youtu.be/' + ytid try: vid_bytes = download_to_bytes(url) except BaseException: vid_bytes = None if isinstance(vid_bytes, int) or vid_bytes is None: unavailable_videos.append( (ytid, video['metadata']['action'])) continue else: vid_bytes = vid_bytes.getvalue() if len(vid_bytes) == 0: unavailable_videos.append( (ytid, video['metadata']['action'])) continue with open(full_video, 'wb') as f: f.write(vid_bytes) if video['metadata']['scene'] is not None: shot_endpoints = video['metadata']['scene'].rsplit('_', 1)[1] start, end = shot_endpoints.split('-') clip = VideoFileClip(str(full_video)) subclip = clip.subclip( int(start) / clip.fps, int(end) / clip.fps) subclip.write_videofile(str(video_path), audio=False, verbose=False, progress_bar=False) else: shutil.copy2(full_video, video_path) videos_to_dump.append((video['name'], video_path, frame_output)) dump_tao_frames([x[1] for x in videos_to_dump], [x[2] for x in videos_to_dump], workers) for video, video_path, frame_dir in videos_to_dump: remove_non_tao_frames(frame_dir, set(checksums[video].keys())) assert are_tao_frames_dumped(frame_dir, checksums[video]), ( f'Not all TAO frames for {video} were extracted.') if unavailable_videos: missing_path = file_path(missing_dir / 'missing.txt') logging.error('\n'.join([ '', f'{len(unavailable_videos)} video(s) could not be downloaded; ' 'please request them from the HACS website by uploading ', f'\t{missing_path}', 'to the following form', '\thttps://goo.gl/forms/0STStcLndI32oke22', 'See the following README for details:', '\thttps://github.com/hangzhaomit/HACS-dataset#request-testing-videos-and-missing-videos-new', ])) with open(missing_path, 'w') as f: csv.writer(f).writerows(unavailable_videos) if len(unavailable_videos) > 20: logging.error( fill('NOTE: Over 20 HACS videos were unavailable. This may mean ' 'that YouTube is rate-limiting your download; please try ' 'running this script again after a few hours, or on a ' 'different machine.'))
def __init__(self, *video_paths, audio=None): self.audio = audio self.videos = [VideoFileClip(path) for path in video_paths]
global BASE_IMG, CANNY_IMG BASE_IMG = base_img ysize = base_img.shape[0] xsize = base_img.shape[1] image = to_hsv(base_img) image = gaussian_blur(image, 3) image = filter_color(image) image = canny(image, 30, 130) CANNY_IMG = image image = region_of_interest( image, np.array([[(40, ysize), ((xsize / 2) - 20, ysize / 2 + 40), ((xsize / 2) + 20, ysize / 2 + 40), (xsize - 40, ysize)]], dtype=np.int32)) image = hough_lines(image, 1, np.pi / 90, 10, 15, 10) # return image return weighted_img(image, base_img, β=250.) src_img = (mpimg.imread('./test_images/solidYellowLeft.jpg') * 255).astype('uint8') src_img = process_image(src_img) plt.imshow(src_img, cmap='hsv_r') plt.show() white_output = 'challengeDone.mp4' clip1 = VideoFileClip('challenge.mp4') # .subclip(14, 16) white_clip = clip1.fl_image( process_image) # NOTE: this function expects color images!! white_clip.write_videofile(white_output, audio=False)
def _load_clip(self): audio_fps = AudioStim.get_sampling_rate(self.filename) self.clip = VideoFileClip(self.filename, audio_fps=audio_fps)
def test(cfg): """ Perform multi-view testing/feature extraction on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) vid_root = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, cfg.DATA.PATH_PREFIX) videos_list_file = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, "vid_list.csv") print("Loading Video List ...") with open(videos_list_file) as f: videos = sorted( [x.strip() for x in f.readlines() if len(x.strip()) > 0]) print("Done") print("----------------------------------------------------------") if cfg.DATA.READ_VID_FILE: rejected_vids = [] print("{} videos to be processed...".format(len(videos))) print("----------------------------------------------------------") start_time = time.time() for vid_no, vid in enumerate(videos): # Create video testing loaders. path_to_vid = os.path.join(vid_root, os.path.split(vid)[0]) vid_id = os.path.split(vid)[1] if cfg.DATA.READ_VID_FILE: try: _ = VideoFileClip( os.path.join(path_to_vid, vid_id) + cfg.DATA.VID_FILE_EXT, audio=False, fps_source="fps", ) except Exception as e: print("{}. {} cannot be read with error {}".format( vid_no, vid, e)) print( "----------------------------------------------------------" ) rejected_vids.append(vid) continue out_path = os.path.join(cfg.OUTPUT_DIR, os.path.split(vid)[0]) out_file = vid_id.split(".")[0] + "_{}.npy".format(cfg.DATA.NUM_FRAMES) if os.path.exists(os.path.join(out_path, out_file)): print("{}. {} already exists".format(vid_no, out_file)) print("----------------------------------------------------------") continue print("{}. Processing {}...".format(vid_no, vid)) dataset = VideoSet(cfg, path_to_vid, vid_id, read_vid_file=cfg.DATA.READ_VID_FILE) test_loader = torch.utils.data.DataLoader( dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, sampler=None, num_workers=cfg.DATA_LOADER.NUM_WORKERS, pin_memory=cfg.DATA_LOADER.PIN_MEMORY, drop_last=False, ) # Perform multi-view test on the entire dataset. feat_arr = perform_inference(test_loader, model, cfg) os.makedirs(out_path, exist_ok=True) np.save(os.path.join(out_path, out_file), feat_arr) print("Done.") print("----------------------------------------------------------") if cfg.DATA.READ_VID_FILE: print("Rejected Videos: {}".format(rejected_vids)) end_time = time.time() hours, minutes, seconds = calculate_time_taken(start_time, end_time) print("Time taken: {} hour(s), {} minute(s) and {} second(s)".format( hours, minutes, seconds)) print("----------------------------------------------------------")
def makeHighlight(self): filename = self.f_label.text() cap = cv2.VideoCapture(filename) video_for_cut = VideoFileClip(filename) fps = cap.get(cv2.CAP_PROP_FPS) #sys.path.append("..") MODEL_NAME = 'soccer_highlight_goal2' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join('training', 'object-detection.pbtxt') detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.io.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) count = 1 cut_count = 0 hightlight = [] cut = [] with detection_graph.as_default(): with tf.compat.v1.Session(graph=detection_graph) as sess: while True: ret, image_np = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Actual detection. try: (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # 동영상 끝나면 highlightui로 넘어가게 된다. except TypeError: self.window = Ui_HighlightWindow() self.window.show() break # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) if (int(cap.get(1)) % 8 == 0): title = "%d.jpg" % count count += 1 # cv2.imshow('object detection', cv2.resize(image_np, (800, 600))) if (float(100 * scores[0][0]) > 99.7): print(title) hightlight.append(count) else: if (count - 1 in hightlight and count + 1 not in hightlight): cut.append(8 * (count - 4)) print(count - 1) if (len(cut) > 1): duration1 = cut[0] / fps duration2 = cut[1] / fps length = duration2 - duration1 print(length) if (length > 60 or length <= 2): cut[0] = cut[1] del (cut[1]) else: start_hour = (duration1 / 3600) start_min = ((duration1 % 3600) / 60) start_sec = duration1 % 60 end_hour = (duration2 / 3600) end_min = ((duration2 % 3600) / 60) end_sec = duration2 % 60 tmp_video = video_for_cut.subclip(duration1, duration2) tmp_title = "./videos/%d+%d+%d~%d+%d+%d.mp4" % ( start_hour, start_min, start_sec, end_hour, end_min, end_sec) cut_count += 1 tmp_video.write_videofile(tmp_title, codec='libx264') cut = []
) ) image = hough_lines(image, 1, np.pi / 90, 100, 15, 10) #print(BASE_IMG.shape, ' - ', CANNY_IMG.shape, ' - ', HOUGH_IMG.shape) return weightImage(image, base_img, β=250.) # image = mimg.imread('test3.jpg') # new_img = processImage(image) # new_file = 'test3Processed.jpg' # # mplt.imshow(new_img, cmap='gray') # # mplt.show() # mimg.imsave(new_file,new_img) inputfile = 'challenge2_short' outputfile = inputfile + '_outputCOMBO' clip1 = VideoFileClip(inputfile+'.mp4') white_clip = clip1.fl_image(processImage) # NOTE: this function expects color images!! white_clip.write_videofile(outputfile+'.mp4', audio=False)
# Download youtube video ########################################################################### if not args.already_downloaded: remove_video_file(args.tmp_filepath) subprocess.check_call( 'youtube-dl --no-continue --output "{}" "{}"'.format( args.tmp_filepath, args.youtube_url, ), shell=True, ) clip = VideoFileClip( args.tmp_filepath, audio=False, ) clip_frame0 = clip.get_frame(0) clip_resolution = (len(clip_frame0), len(clip_frame0[0])) if clip_resolution != TARGET_RESOLUTION: clip.reader.close() clip = VideoFileClip( args.tmp_filepath, audio=False, target_resolution=TARGET_RESOLUTION, resize_algorithm='fast_bilinear', ) high_res_clip = VideoFileClip( args.tmp_filepath, audio=False, resize_algorithm='fast_bilinear',
def process_video_file(filepath, output_dir=None, suffix=None, audio_model=None, image_model=None, input_repr="mel256", content_type="music", audio_embedding_size=6144, audio_center=True, audio_hop_size=0.1, image_embedding_size=8192, audio_batch_size=32, image_batch_size=32, overwrite=False, verbose=True): """ Computes and saves L3 audio and video frame embeddings for a given video file Note that image embeddings are computed for every frame of the video. Also note that embeddings for the audio and images are not temporally aligned. Please refer to the timestamps in the output files for the corresponding timestamps for each set of embeddings. Parameters ---------- filepath : str or list[str] Path or list of paths to video file(s) to be processed. output_dir : str or None Path to directory for saving output files. If None, output files will be saved to the directory containing the input file. suffix : str or None String to be appended to the output filename, i.e. <base filename>_<modality>_<suffix>.npz. If None, then no suffix will be added, i.e. <base filename>_<modality>.npz. audio_model : keras.models.Model or None Loaded audio model object. If a model is provided, then `input_repr`, `content_type`, and `embedding_size` will be ignored. If None is provided, the model will be loaded using the provided values of `input_repr`, `content_type` and `embedding_size`. image_model : keras.models.Model or None Loaded audio model object. If a model is provided, then `input_repr`, `content_type`, and `embedding_size` will be ignored. If None is provided, the model will be loaded using the provided values of `input_repr`, `content_type` and `embedding_size`. input_repr : "linear", "mel128", or "mel256" Spectrogram representation used for audio model. Ignored if `model` is a valid Keras model. content_type : "music" or "env" Type of content used to train the embedding model. Ignored if `model` is a valid Keras model. audio_embedding_size : 6144 or 512 Audio embedding dimensionality. Ignored if `model` is a valid Keras model. audio_center : boolean If True, pads beginning of audio signal so timestamps correspond to center of window. audio_hop_size : float Hop size in seconds. image_embedding_size : 8192 or 512 Video frame embedding dimensionality. Ignored if `model` is a valid Keras model. audio_batch_size : int Batch size used for input to audio embedding model image_batch_size : int Batch size used for input to image embedding model overwrite : bool If True, overwrites existing output files verbose : bool If True, prints verbose messages. Returns ------- """ if isinstance(filepath, six.string_types): filepath_list = [filepath] elif isinstance(filepath, list): filepath_list = filepath else: err_msg = 'filepath should be type str or list[str], but got {}.' raise OpenL3Error(err_msg.format(filepath)) # Load models if not audio_model: audio_model = load_audio_embedding_model(input_repr, content_type, audio_embedding_size) if not image_model: image_model = load_image_embedding_model(input_repr, content_type, image_embedding_size) audio_suffix, image_suffix = "audio", "image" if suffix: audio_suffix += "_" + suffix image_suffix += "_" + suffix audio_list = [] sr_list = [] audio_batch_filepath_list = [] total_audio_batch_size = 0 image_list = [] frame_rate_list = [] image_batch_filepath_list = [] num_files = len(filepath_list) for file_idx, filepath in enumerate(filepath_list): if not os.path.exists(filepath): raise OpenL3Error('File "{}" could not be found.'.format(filepath)) if verbose: print("openl3: Processing {} ({}/{})".format( filepath, file_idx + 1, num_files)) # Skip if overwriting isn't enabled and output file exists audio_output_path = get_output_path(filepath, audio_suffix + ".npz", output_dir=output_dir) image_output_path = get_output_path(filepath, image_suffix + ".npz", output_dir=output_dir) skip_audio = os.path.exists(audio_output_path) and not overwrite skip_image = os.path.exists(image_output_path) and not overwrite if skip_audio and skip_image: err_msg = "openl3: {} and {} exist, skipping." print(err_msg.format(audio_output_path, image_output_path)) continue try: clip = VideoFileClip(filepath, target_resolution=(256, 256), audio_fps=TARGET_SR) audio = clip.audio.to_soundarray(fps=TARGET_SR) images = np.array([frame for frame in clip.iter_frames()]) except Exception: err_msg = 'Could not open file "{}":\n{}' raise OpenL3Error(err_msg.format(filepath, traceback.format_exc())) if not skip_audio: audio_list.append(audio) sr_list.append(TARGET_SR) audio_batch_filepath_list.append(filepath) audio_len = audio.shape[0] audio_hop_length = int(audio_hop_size * TARGET_SR) num_windows = 1 + max( ceil((audio_len - TARGET_SR) / float(audio_hop_length)), 0) total_audio_batch_size += num_windows else: err_msg = "openl3: {} exists, skipping audio embedding extraction." print(err_msg.format(audio_output_path)) if not skip_image: image_list.append(images) frame_rate_list.append(int(clip.fps)) image_batch_filepath_list.append(filepath) else: err_msg = "openl3: {} exists, skipping image embedding extraction." print(err_msg.format(image_output_path)) if (total_audio_batch_size >= audio_batch_size or file_idx == (num_files - 1)) and len(audio_list) > 0: embedding_list, ts_list \ = get_audio_embedding(audio_list, sr_list, model=audio_model, input_repr=input_repr, content_type=content_type, embedding_size=audio_embedding_size, center=audio_center, hop_size=audio_hop_size, batch_size=audio_batch_size, verbose=verbose) for fpath, embedding, ts in zip(audio_batch_filepath_list, embedding_list, ts_list): output_path = get_output_path(fpath, audio_suffix + ".npz", output_dir=output_dir) np.savez(output_path, embedding=embedding, timestamps=ts) assert os.path.exists(output_path) if verbose: print("openl3: Saved {}".format(output_path)) audio_list = [] sr_list = [] audio_batch_filepath_list = [] total_audio_batch_size = 0 if (len(image_list) >= image_batch_size or file_idx == (num_files - 1)) and len(image_list) > 0: embedding_list, ts_list \ = get_image_embedding(image_list, frame_rate_list, model=image_model, input_repr=input_repr, content_type=content_type, embedding_size=image_embedding_size, batch_size=image_batch_size, verbose=verbose) for fpath, embedding, ts in zip(image_batch_filepath_list, embedding_list, ts_list): output_path = get_output_path(fpath, image_suffix + ".npz", output_dir=output_dir) np.savez(output_path, embedding=embedding, timestamps=ts) assert os.path.exists(output_path) if verbose: print("openl3: Saved {}".format(output_path)) image_list = [] frame_rate_list = [] image_batch_filepath_list = []
def test_write_gif_ImageMagick(): clip = VideoFileClip("media/big_buck_bunny_432_433.webm").subclip(0.2, 0.5) location = os.path.join(TMP_DIR, "imagemagick_gif.gif") clip.write_gif(location, program="ImageMagick") close_all_clips(locals())
def download_ava(root, annotations, checksums, workers=8, movies_dir=None): if movies_dir is None: movies_dir = root / 'cache' / 'ava_movies' movies_dir.mkdir(exist_ok=True, parents=True) logging.info(f'Downloading AVA videos.') videos = [ v for v in annotations['videos'] if v['metadata']['dataset'] == 'AVA' ] movie_clips = defaultdict(list) for v in videos: movie_clips[v['metadata']['movie']].append(v) movie_info = ava_load_meta() videos_dir = root / 'videos' frames_root = root / 'frames' for movie_stem, clips in tqdm(movie_clips.items(), desc='Processing AVA movies'): movie = f"{movie_stem}.{movie_info[movie_stem]['ext']}" # List of (clip, output clip path, output frames directory) for clips # whose frames have not already been extracted. to_process = [] for clip in clips: name = clip['name'] output_clip = file_path(videos_dir / f"{name}.mp4") output_frames = dir_path(frames_root / name) if are_tao_frames_dumped(output_frames, checksums[name], warn=False): logging.debug(f'Skipping extracted clip: {name}') continue to_process.append((clip, output_clip, output_frames)) # Download movie if necessary. if all(x[1].exists() for x in to_process): movie_vfc = None else: if movies_dir and (movies_dir / movie).exists(): downloaded_movie_this_run = False movie_path = movies_dir / movie logging.debug(f'Found AVA movie {movie} at {movie_path}') else: downloaded_movie_this_run = True movie_path = movies_dir / movie if not movie_path.exists(): logging.debug(f'Downloading AVA movie: {movie}.') url = ( f"{AVA_URL}/{movie_info[movie_stem]['split']}/{movie}") urllib.request.urlretrieve(url, movie_path) movie_vfc = VideoFileClip(str(movie_path)) for clip_info, clip_path, frames_dir in tqdm(to_process, desc='Extracting shots', leave=False): if clip_path.exists(): continue shot_endpoints = clip_info['metadata']['scene'].rsplit('_', 1)[1] start, end = shot_endpoints.split('-') subclip = movie_vfc.subclip( int(start) / movie_vfc.fps, int(end) / movie_vfc.fps) subclip.write_videofile(str(clip_path), audio=False, verbose=False, progress_bar=False) close_clip(subclip) if movie_vfc: close_clip(movie_vfc) if downloaded_movie_this_run: movie_path.unlink() logging.debug( f'AVA: Dumping TAO frames:\n{[x[1:] for x in to_process]}') dump_tao_frames([x[1] for x in to_process], [x[2] for x in to_process], workers) for clip, clip_path, frame_dir in to_process: if not are_tao_frames_dumped(frame_dir, checksums[clip['name']]): raise ValueError( f'Not all TAO frames for {clip["name"]} were extracted. ' f'Try deleting the clip at {clip_path} and running this ' f'script again.') remove_non_tao_frames(frame_dir, set(checksums[clip['name']].keys())) assert are_tao_frames_dumped(frame_dir, checksums[clip['name']]), ( f'ERROR: TAO frames were dumped properly for {clip["name"]}, ' f'but were deleted by `remove_non_tao_frames`! This is a bug, ' f'please report it.')
def test_save_frame(): clip = VideoFileClip("media/big_buck_bunny_432_433.webm") location = os.path.join(TMP_DIR, "save_frame.png") clip.save_frame(location, t=0.5) assert os.path.isfile(location) close_all_clips(locals())
def test_cuts1(): clip = VideoFileClip("media/big_buck_bunny_432_433.webm").resize(0.2) cuts.find_video_period(clip) == pytest.approx(0.966666666667, 0.0001) close_all_clips(locals())
TxtFile = pureName + ".txt" TxtFile = os.path.join(MdPath, TxtFile) if os.path.exists(TxtFile): TxtPath = MdPath else: print("No srt or txt file and exist") final_out(21) else: if SrtPath is None or len(SrtPath) == 0: SrtPath = os.path.dirname(SrtFile) print("MP4 {}/{} TXT {}/{} SRT {}/{} ".format(MdPath, MdFile, TxtFile, TxtPath, SrtFile, SrtPath)) print("Reading length of the video") vidoclip = VideoFileClip(MdFile) VideoLen = vidoclip.duration if TxtFile: read_text_srt(TxtFile) convert_txt_to_srt(TxtFile, SrtFile) elif SrtFile: print("Load SrtFile {} directlly".format(SrtFile)) load_srt_file(SrtFile) else: print("No text or srt file ") final_out(23) if num_itm: print("Convert txt to speach from baidu") baidu_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # time.sleep(8)
def load_clip(self, filename: str): audio_fps = self.get_audio_sampling_rate(filename) self.clip = VideoFileClip(filename, audio_fps)