def download_audio(url, filename_out=None, t1=None, t2=None, verbose=False): ''' Descarga un vídeo de youtube, extrae el audio y lo guarda en un archivo ''' from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_audio from os import remove # sudo apt-get install mplayer # sudo apt-get install lame if filename_out == None: filename_out = YoutubeApi().search_video(url)['title'] response = download_video(url, filename_out='video_temp', verbose=verbose) if t1 != None and t2 == None or t1 == None and t2 != None: exc = 'You must provide both temps: start and end' raise AttributeError(exc) if t1 != None and t2 != None: cut('video_temp.mp4', t1, t2, 'video_temp_edit.mp4', verbose=verbose) if verbose == True: print('Converting into .wav\n Please, wait...') ffmpeg_extract_audio('video_temp_edit.mp4', filename_out + '.wav') remove('video_temp_edit.mp4') elif t1 == None and t2 == None: if verbose == True: print('Converting into .wav\n Please, wait...') ffmpeg_extract_audio('video_temp.mp4', filename_out + '.wav') remove('video_temp.mp4') if verbose == True: print('Video converted correctly') return filename_out + '.wav'
def chunkAudio(self, v): print(f"chunkAudio session {randomString(4)}") pvc = v.getFullVideo() # create tandem mp3 audio af = self.workD.append("chunks").append(f"chunk_{randomString(7)}.mp3") print(f"created temporary file {af.path()}") ffmpeg_extract_audio(v.aPath(), af.aPath()) #, ffmpeg_params=["-preset","fast"]) a = AudioSegment.from_mp3(af.aPath()) print(a) packets = make_chunks(a, self.chulenms) print(f"dividing clip") # make 5 minute segments to process simultaneously n = int(pvc.duration // self.chuLenS) import concurrent.futures from multiprocessing import Pool #with multiprocessing.Pool() as p: # p.map(self.chunkAudio_createChunk, range(n)) subclips = [None] * n #list() executor = concurrent.futures.ProcessPoolExecutor(61) futures = [ executor.submit(self.chunkAudio_createChunk, i, pvc, subclips) for i in range(n) ] #for i in range(n): print(f" breakpoint 1") #self.tmpChunks = list() # cannot perform #self.tmpCounter = 0 print(f"preparing jobs for list of size {len(subclips)}") self.tmpChunks = list() executor = concurrent.futures.ProcessPoolExecutor(61) futures = [ executor.submit(self.appendChunks, subclips[i], i, self.tmpChunks) for i in range(len(subclips)) ] # run code in the meantime concurrent.futures.wait(futures) print(f"aggregated all chunks") ###### # order in case concurrent was out of order self.tmpChunks = sorted(self.tmpChunks, key=lambda element: (element[0], element[1])) print(f"organized all chunks") for i in range(len(self.tmpChunks)): i1 = max(0, i - spreadCalc) i2 = min(len(self.tmpChunks), i + spreadCalc) self.tmpChunks[i].sv = sum( list(map(lambda x: x.volume, self.tmpChunks[i1:i2])) / max(1, i2 - i1)) print(f"spread volumes calculated") print(self.tmpChunks) af.delete() #os.remove(af.aPath()) print(f"destroyed temporary file {af.path()}")
def makeAudio(self, i, vidList, audioFileList, UUID): print(f"({UUID}) makeAudio()") #v = self.extractAudio(i, vidList) #pvc = v.getFullVideo() # create tandem mp3 audio af = self.workD.append("chunks").append(f"chunk_{randomString(12)}.mp3") print(f"created temporary file {af.path()}") ffmpeg_extract_audio(vidList[i].aPath(), af.aPath()) audioFileList.append(af) print(f" end ({UUID}) makeAudio()")
def write_remix_video(remix_filenames, video_filename): # Concatenate clips together video_file_clips = [ VideoFileClip(remix_filename) for remix_filename in remix_filenames ] remix_video = concatenate_videoclips(video_file_clips) remix_filename = REMIX_DIRECTORY + 'remix-' + video_filename remix_filename_noaudio = remix_filename.replace('.mp4', '-noaudio.mp4') remix_video.write_videofile(remix_filename_noaudio) audio_filename = AUDIO_DIRECTORY + video_filename.replace('mp4', 'mp3') ffmpeg_extract_audio(VIDEO_DIRECTORY + video_filename, audio_filename) ffmpeg_merge_video_audio(remix_filename_noaudio, audio_filename, remix_filename) os.remove(remix_filename_noaudio)
import imageio imageio.plugins.ffmpeg.download() # MoviePy Audio Extractor from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_audio # Audio Reader for Numpy Arrays from scipy.io.wavfile import read # MatPlotLib to try and plot the array import matplotlib.pyplot as plt # Subclip Creator from moviepy.video.VideoClip import VideoClip as clip path = os.path.join(os.getcwd(), 'files') print(path) ffmpeg_extract_audio(path + "/test.mp4", path + "/audio.wav") rate, data = read(path + "/audio.wav") print(data) print(type(data)) #plt.imshow(data) #plt.show() def subclip_creator(clip, start, end): return clip.subclip(start, end)
def preprocess(self, batch_size=16): """ Outputs: Writes to disk the openl3 embedding pickle object for each sample. Optionally, it will output the entire matched X and Y numpy pickle objects if label path is provided - """ normalizer = Normalizer() tmp_output_folder = "" if self.video_folder.endswith(".zip"): # Unzips files to a temp directory tmp_output_folder = self.output_folder.rstrip('/') + "_tmp" print(f"Unzipping files to temp dir {tmp_output_folder}...") Path(f"{tmp_output_folder}").mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(self.video_folder, 'r') as zip_ref: zip_ref.extractall(tmp_output_folder) print("Finished unzipping files") else: tmp_output_folder = self.video_folder print("Skipping unzipping files as input is a folder") Path(f"{self.output_folder}/audio-pickle/").mkdir(parents=True, exist_ok=True) # Strip the audio from video and store as .wav file video_files = sorted(glob.glob(tmp_output_folder + '/*.mp4')) video_files_split = np.array_split(np.asarray(video_files), len(video_files) // batch_size) target_labels = [] if self.label_path is not None: targets = [] target_labels = np.genfromtxt(self.label_path, delimiter=' ', dtype='str') sr = 0 all_x = [] maxlen = int(self.max_len // self.hop_size + 1) for i in range(0, len(video_files_split)): audio_reads = [] for f in video_files_split[i]: newname = os.path.basename(f) output_wav_file = newname + 'extracted_audio.wav' ffmpeg_extract_audio(f, "/tmp/" + output_wav_file) if self.label_path is not None: target_index = np.where( target_labels[:, 0] == newname[:-4])[0] target_index = int(target_index) target = int(target_labels[:, 1][target_index]) - 1 targets.append(target) audio_read, sr = sf.read("/tmp/" + output_wav_file) audio_reads.append(audio_read) print(f"Reading file {output_wav_file} ...") X_arr, ts_list = openl3.get_audio_embedding(audio_reads, sr, batch_size=15, hop_size=self.hop_size) X = tf.keras.preprocessing.sequence.pad_sequences(X_arr, maxlen=maxlen) X = np.asarray(X, dtype='float32') if i == 0: all_x = X all_x = np.asarray(all_x, dtype='float32') else: all_x = np.concatenate((all_x, X), axis=0) print(all_x.shape) all_x_norm = all_x for i in range(0, len(all_x_norm)): all_x_norm[i] = normalizer.fit_transform(all_x_norm[i]) for f in video_files: file_name = os.path.basename(f) with open( f"{self.output_folder}/audio-pickle/{file_name}-openl3.pkl", "wb") as f_out: pickle.dump(all_x_norm[i], f_out) if self.label_path is not None: with open(f"{self.output_folder}/audio-pickle-all-X-openl3.pkl", "wb") as f_out: pickle.dump(all_x_norm, f_out) targets = np.asarray(targets) with open(f"{self.output_folder}/audio-pickle-all-Y-openl3.pkl", "wb") as f_out: pickle.dump(targets, f_out) if self.output_file is not None: print(f"Starting to zip files to {self.output_file}") def zipdir(path, ziph): for root, dirs, files in os.walk(path): folder = root[len(path):] for file in files: ziph.write(join(root, file), join(folder, file)) zipf = zipfile.ZipFile(self.output_file, 'w', zipfile.ZIP_DEFLATED) zipdir(self.output_folder, zipf) zipf.close() print(f"Done zipping files to {self.output_file}") print("Done!")
def get_audio_clip(self): if path.exists(self.audiofile): remove(self.audiofile) mv.ffmpeg_extract_audio(self.videosource, self.audiofile) return
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_audio from moviepy.video.io.VideoFileClip import VideoFileClip import datetime CLIP_DURATION = 10 #in minutes VIDEO_NAME = "video_comp3.mp4" VFC = VideoFileClip(VIDEO_NAME) DURATION = VFC.duration print("Duration: ",VFC.duration) # file_names=[] i=1 for time in range (0,int(DURATION),(CLIP_DURATION*60)): timestamp=str(datetime.timedelta(seconds = int (time))) # targetname="clip-"+timestamp targetname="clip-"+str(i) i+=1 ffmpeg_extract_subclip(VIDEO_NAME, time, time+(CLIP_DURATION*60), targetname="video_clips/"+targetname+".mp4") try: ffmpeg_extract_audio(("video_clips/"+targetname+".mp4"),str("audio_clips/"+targetname+".wav"),bitrate=500) except IOError: print("No audio source found in video") i=1 for time in range (0,int(DURATION),2): print("Image: img-"+str(i)+".jpg") i+=1 VFC.save_frame("img_frames/img-"+str(i)+".jpg",t=time,withmask=False)
def get_audio(video_name, directory): #clip = VideoFileClip(os.path.join(dir, video_name)) ffmpeg_extract_audio(os.path.join(directory, video_name), output = os.path.join(directory, ''.join(video_name.split('.')[:-1]) + '.mp3'))
def check_category(category): print category['category_name'] try: os.makedirs('resault/%s' % (category['category_name'])) except: print 'Directory already created!' prepare_link = DOMAIN_LINK + category['category_link'] page_request = requests.get(prepare_link).content page_tree = etree.HTML(page_request) paginations = page_tree.xpath( './/div[@class="szukaj_pagination"]/p/a')[:-1] pagination_list = [] for pagination in paginations: page_link = ''.join(pagination.xpath('.//@href')) pagination_list.append(page_link) if len(pagination_list) >= 2: pagination_list[0] = pagination_list[1].replace('strona-2', 'strona-1') for video_list in pagination_list: video_page_link = DOMAIN_LINK + video_list video_page_request = requests.get(video_page_link).content video_page_tree = etree.HTML(video_page_request) video_detail_link_list = video_page_tree.xpath( './/div[@class="os_czasu_wyniki_bg"]//@href') for video_detail_link in video_detail_link_list: video_detail_link_prepare = DOMAIN_LINK + video_detail_link video_detail_link_request = requests.get( video_detail_link_prepare).content video_detail_link_tree = etree.HTML(video_detail_link_request) # file link creation video_player_blok = video_detail_link_tree.xpath( './/div[@class="player"]/script/text()') try: video_player_link = filter( lambda x: "'file': 'http://kronikarp.pl:83/" in x, video_player_blok[0].split('\r\n\t\t\t')) except: continue video_file_link = video_player_link[0].replace("'file': '", '')[:-2] # title creation video_title = ''.join( video_detail_link_tree.xpath( './/div[@class="player_and_news"]/div/h2/text()') [1:]).strip().replace('/', '|') print video_title # subtitles try: subtitles_blok = video_detail_link_tree.xpath( './/div[@class="opisfilmu"]/p/a/@onclick')[1] subtitle_link = subtitles_blok.split("'")[1] subtitle_check = True except Exception as e: print e subtitle_check = False # start download if subtitle_check: subtitle_file_name = 'resault/%s/%s.txt' % ( category['category_name'], video_title) subtitle_file_request = requests.get(DOMAIN_LINK + subtitle_link).content subtitle_file_tree = etree.HTML(subtitle_file_request) subtitle_file_text = subtitle_file_tree.xpath( './/div[@class="srodek"]//text()') subtitle_file_text_out = ''.join(subtitle_file_text).replace( '\r\n\r\n\r\n\t\r\n\t\t\r\n\t\t\t\t\t\t', '' ).replace( '\r\n\t\t\t\t\t\r\n\t\t\r\n\r\n\t\r\n\r\n\r\n\r\n\r\n\t', '') out_text = open(subtitle_file_name, 'w') out_text.write(subtitle_file_text_out.encode('utf-8')) out_text.close() try: f_video_file_name = 'resault/%s/%s.mp4' % ( category['category_name'], video_title) f_video_file_request = requests.get(video_file_link).content out_video = open(f_video_file_name, 'a') out_video.write(f_video_file_request) out_video.close() except Exception as e: print e continue try: t = f_video_file_name.replace('.mp4', '.wav') ffmpeg_extract_audio(f_video_file_name, t, bitrate=3000, fps=44100) except Exception as e: print e continue
def ffwd_video(path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4): video_clip = VideoFileClip(path_in, audio=False) # Create a temporary file to store the audio. fp = tempfile.NamedTemporaryFile(suffix='.aac') temp_audio_file_name = fp.name fp.close() # Create a temporary file to store the video. fp = tempfile.NamedTemporaryFile(suffix='.mp4') temp_video_file_name = fp.name fp.close() # Extract the audio. ffmpeg_tools.ffmpeg_extract_audio(path_in, temp_audio_file_name) video_writer = ffmpeg_writer.FFMPEG_VideoWriter( temp_video_file_name, video_clip.size, video_clip.fps, codec="libx264", preset="medium", audiofile=None, threads=None, ffmpeg_params=["-b:v", "2000k"]) g = tf.Graph() soft_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True with g.as_default(), g.device(device_t), \ tf.compat.v1.Session(config=soft_config) as sess: batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3) img_placeholder = tf.compat.v1.placeholder(tf.float32, shape=batch_shape, name='img_placeholder') preds = src.transform.net(img_placeholder) saver = tf.compat.v1.train.Saver() if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception("No checkpoint found...") else: saver.restore(sess, checkpoint_dir) X = np.zeros(batch_shape, dtype=np.float32) def style_and_write(count): for i in range(count, batch_size): X[i] = X[count - 1] # Use last frame to fill X _preds = sess.run(preds, feed_dict={img_placeholder: X}) for i in range(0, count): video_writer.write_frame( np.clip(_preds[i], 0, 255).astype(np.uint8)) frame_count = 0 # The frame count that written to X for frame in video_clip.iter_frames(): X[frame_count] = frame frame_count += 1 if frame_count == batch_size: style_and_write(frame_count) frame_count = 0 if frame_count != 0: style_and_write(frame_count) video_writer.close() # Merge audio and video ffmpeg_tools.ffmpeg_merge_video_audio(temp_video_file_name, temp_audio_file_name, path_out) # Delete temporary files os.remove(temp_video_file_name) os.remove(temp_audio_file_name)