def render(mix_id, *args): minimum = sys.maxint for arg in args: if arg[0] < minimum: minimum = arg[0] for arg in args: arg[0] -= minimum prio_queue = Queue.PriorityQueue() for arg in args: prio_queue.put(arg) base = prio_queue.get(0) base_track = AudioSegment.from_file(base[1], "m4a") gain = base[2] base_track = base_track.apply_gain(gain) while not prio_queue.empty(): overlay = prio_queue.get(0) overlay_track = AudioSegment.from_file(overlay[1], "m4a") gain = overlay[2] if gain != 0: overlay_track = overlay_track.apply_gain(gain) base_track = base_track.overlay(overlay_track, position=overlay[0]) base_track.export('mix.wav', format='wav') command = 'ffmpeg -b 66k -y -f wav -i ./mix.wav ./mix.aac' subprocess.call(command, shell=True) os.remove('mix.wav') # concaac(mix_id, [0, "test1.m4a", 0], [5000, "test2.m4a", -10], [10000, "test3.m4a", 5])
def export_wav(self, filename): n = self.song.num_tracks self.song.export_song("temp/song.abc") sounds = ["--syn_a", "--syn_b", "--syn_s", "--syn_e"] for i in range(n): os.system( "python read_abc.py temp/song.abc " + str(i + 1) + " temp/out_" + str(i + 1) + ".wav " + random.choice(sounds) ) os.remove("temp/song.abc") combined = AudioSegment.from_file("temp/out_1.wav") if n >= 2: for i in range(1, n): sound = AudioSegment.from_file("temp/out_" + str(i + 1) + ".wav") combined = combined.overlay(sound) combined.export(filename, format="wav") for i in range(n): os.remove("temp/out_" + str(i + 1) + ".wav")
def main(): global background global prettyGirls global oyeahs global marsOyeah global girlsPretty createBackground(song) prettyGirls(song) # we just so pretty soPretty = song[19990:21250] soPretty.export('soPretty.wav', 'wav') soPretty = wave.open('soPretty.wav', 'r') soPrettySlow = wave.open('soPrettySlow.wav', 'w') soPrettySlow.setparams(soPretty.getparams()) writeFrames = soPretty.readframes(soPretty.getnframes()) soPrettySlow.setframerate(soPretty.getframerate() / 2) soPrettySlow.writeframes(writeFrames) soPrettySlow.close() soPrettySlow = AudioSegment.from_wav('soPrettySlow.wav') #combine last two silent5 = AudioSegment.silent(duration=22000) smallSilent = AudioSegment.silent(90) girlsPretty = prettyGirls.append(smallSilent).append(soPrettySlow).append(silent5) ohYeah(song) mars(song) drums(song) delete()
def main(): converted_files = [] if not os.path.exists(OUTPUT_DIR): try: os.makedirs(OUTPUT_DIR) except Exception as e: now = datetime.now().strftime('%Y.%m.%d %H:%M') logger.error( "{} Ошибка при создании директории, текст: {}".format(now, e) ) sys.exit(1) while True: files = [f for f in os.listdir(DIRECTORY) if os.path.isfile( os.path.join(DIRECTORY, f))] for f in files: if f.split('.')[1] == EXTENSION and f not in converted_files: new_name = f.split('.')[0] + '.mp3' now = datetime.now().strftime('%Y.%m.%d %H:%M') try: AudioSegment.from_wav(os.path.join(DIRECTORY, f)).export( os.path.join(OUTPUT_DIR, new_name), format="mp3") converted_files.append(f) logger.debug( "{} Успешно переконвертировали файл {} ".format(now, f) ) except Exception as e: logger.error( "{} Ошибка при конвертации файла {}, текст: {}". format(now, f, e) ) sys.exit(1)
def main(): print("Gettings raw number sound bites.") # get each sound as pydub audio segment and add to list for easy access for i in range(10): number_sounds.append(AudioSegment.from_ogg("sound_bites/%i.ogg" % i)) # load in the beast by the lines of the file lines = loadBigNumFileToList() print("Creating blank audio file in memory.") output = AudioSegment.silent(duration=500) # 'blank' slate to append to. job_server = pp.Server() print("Splitting labor, and starting") # Define jobs, cpu cores/2 in my case # give range and other params job1 = job_server.submit(processRangeForLines, (range(0,10), lines, number_sounds)) job2 = job_server.submit(processRangeForLines, (range(10,20), lines, number_sounds)) # execute and grab value job1_audio = job1() job2_audio = job2() print("Final concatenation.") output += job1_audio + job2_audio print("Done making, now exporting... it make take a while.") file_handle = output.export("output.ogg", format="ogg", bitrate="64k", tags={"artist": "Keely Hill", "comments":"Made proudly."}) print("\033[92m\033[1mComplete!\033[0m")
def start(self): # Server runs until killed while True: # If we have a request, play it if len(self.request_list) != 0: self.current_song = AudioSegment.from_mp3("../songs/" + self.request_list.popleft()) # Otherwise, play a random song else: self.current_song = AudioSegment.from_mp3("../songs/" + random.choice(self.songlist)) self.new_song() # Stream the entire song for chunk in self.current_song: # Simply skip the time for the client if not self.has_client: sleep(0.001) else: # Stream chunk to first client client, address = self.clients[0] try: chunk = chunk.raw_data chunk = chunk[:self.chunk_size].ljust(self.chunk_size) chunk_length = str(len(chunk)) client.sendto(bytes("SC" + chunk_length + (4-len(chunk_length))*" ", "UTF-8"), address) client.sendto(chunk, address) # Disconnects will be handled, just maybe not on time to avoid # this error a few times. We just ignore the error except BrokenPipeError: pass
def get_data(path): """ Gets the data associated with an audio file, converting to wav when necessary. :param path: path to audio file :return: sample rate, audio data """ if path.endswith(".wav"): bee_rate, bee_data = read(path) else: temp = tempfile.NamedTemporaryFile(suffix=".wav") temp.close() if path.endswith(".flac"): sound = AudioSegment.from_file(path, "flac") sound.export(temp.name, format="wav") elif path.endswith(".mp3"): sound = AudioSegment.from_file(path, "mp3") sound.export(temp.name, format="wav") bee_rate, bee_data = read(temp.name) os.remove(temp.name) data_type = np.iinfo(bee_data.dtype) dmin = data_type.min dmax = data_type.max bee_data = bee_data.astype(np.float64) bee_data = 2.0 * ((bee_data - dmin) / (dmax - dmin)) - 1.0 bee_data = bee_data.astype(np.float32) return bee_rate, bee_data
def mixer(first_path, second_path, mix_path, tag=None, f_format='wav'): ''' ffmpeg or avconv are required for MP3 format mixing. WAV format must be 8, 16, or 32 bit (24 bit is not supported by pydub) ''' with open(first_path, 'rb') as f: first = AudioSegment.from_file(f, format=f_format) with open(second_path, 'rb') as f: second = AudioSegment.from_file(f, format=f_format) if len(first) > len(second): mix = first.overlay(second) else: mix = second.overlay(first) mix.export(mix_path, format=f_format) metadata = { 'tag': tag, 'first_file': { 'path': first_path, 'length': first.duration_seconds }, 'second_file': { 'path': second_path, 'length': second.duration_seconds }, 'mix': { 'path': mix_path, 'length': mix.duration_seconds } } return metadata
def GetVoice(word): # https://tts.voicetech.yandex.net/generate?text=text&key=3f874a4e-723d-48cd-a791-7401169035a0&format=mp3&speaker=zahar&emotion=good req =('https://tts.voicetech.yandex.net/generate?ie=UTF-8&text='+word+'&key='+API_KEY_VOICE+'&format=mp3&speaker=ermil&emotion=neutral') response = requests.get(req, stream=True) with open("yasound.mp3", "wb") as handle: for data in tqdm(response.iter_content()): handle.write(data) AudioSegment.from_file('yasound.mp3').export("yasound.ogg", format="ogg")
def setUp(self): global test1, test2, test3, testparty, testdcoffset if not test1: a = os.path.join(data_dir, 'test1.mp3') test1 = AudioSegment.from_mp3(os.path.join(data_dir, 'test1.mp3')) test2 = AudioSegment.from_mp3(os.path.join(data_dir, 'test2.mp3')) test3 = AudioSegment.from_mp3(os.path.join(data_dir, 'test3.mp3')) testdcoffset = AudioSegment.from_mp3( os.path.join(data_dir, 'test-dc_offset.wav')) testparty = AudioSegment.from_mp3( os.path.join(data_dir, 'party.mp3')) self.seg1 = test1 self.seg2 = test2 self.seg3 = test3 self.mp3_seg_party = testparty self.seg_dc_offset = testdcoffset self.ogg_file_path = os.path.join(data_dir, 'bach.ogg') self.mp4_file_path = os.path.join(data_dir, 'creative_common.mp4') self.mp3_file_path = os.path.join(data_dir, 'party.mp3') self.webm_file_path = os.path.join(data_dir, 'test5.webm') self.jpg_cover_path = os.path.join(data_dir, 'cover.jpg') self.png_cover_path = os.path.join(data_dir, 'cover.png')
def overdub(_files, _returnPath): s1, s2 = AudioSegment.from_wav(_files[0]), AudioSegment.from_wav(_files[1]) _dubbed = s1.overlay(s2) _dubbed.export(_returnPath, format='wav') os.remove(_files[0]) os.remove(_files[1]) return True
def test_audio_segment_from_path_like_bytes(self): seg1 = AudioSegment.from_file(self.mp3_path_str) seg2 = AudioSegment.from_file(self.mp3_path_like_bytes) self.assertEqual(len(seg1), len(seg2)) self.assertEqual(seg1._data, seg2._data) self.assertTrue(len(seg1) > 0)
def test_direct_instantiation_with_bytes(self): seg = AudioSegment( b'RIFF\x28\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x00}\x00\x00\x00\xf4\x01\x00\x04\x00\x10\x00data\x04\x00\x00\x00\x00\x00\x00\x00') self.assertEqual(seg.frame_count(), 1) self.assertEqual(seg.channels, 2) self.assertEqual(seg.sample_width, 2) self.assertEqual(seg.frame_rate, 32000)
def talklang(phrase,lang='FR'): try: language_dict = {"FR" : 'fr-FR', "US" : 'en-US', "GB" : 'en-GB', "DE" : 'de-DE', "ES" : 'es-ES', "IT" : 'it-IT' } language=language_dict[lang] phrase=phrase.encode('utf-8') cachepath=os.path.dirname(os.path.dirname(__file__)) file = 'tts' filename=os.path.join(cachepath,file+'.wav') filenamemp3=os.path.join(cachepath,file+'.mp3') os.system('pico2wave -l '+language+' -w '+filename+ ' "' +phrase+ '"') song = AudioSegment.from_wav(filename) songmodified=song songmodified.export(filenamemp3, format="mp3", bitrate="128k", tags={'albumartist': 'Talkie', 'title': 'TTS', 'artist':'Talkie'}, parameters=["-ar", "44100","-vol", "200"]) song = AudioSegment.from_mp3(filenamemp3) cmd = ['mplayer'] cmd.append(filenamemp3) if GPIO.input(17) != 0 : print 'GPIO 17 en cours d\'utilisation' while GPIO.input(17) != 0 : time.sleep(0.5) print 'GPIO 17 libre' GPIO.output(18, 1) print 'GPIO 18 ON et synthese du message' with open(os.devnull, 'wb') as nul: subprocess.call(cmd, stdout=nul, stderr=subprocess.STDOUT) GPIO.output(18, 0) print 'Synthese finie GPIO 18 OFF' except Exception, e: return str(e)
def convertFile(self, url, local, verbose=True): try: if verbose: print ' (downloading ' + local + ')' format = url[-3:] localMp3 = local[:-4] + '.mp3' if settings.localTemp[-1] != '/': settings.localTemp += '/' if settings.localPublish[-1] != '/': settings.localPublish += '/' localTempFile = settings.localTemp + local localMp3File = settings.localPublish + localMp3 localMp3URL = settings.localPublishRelative + localMp3 if format != 'mp3': if utils.downloadFile(url, localTempFile): if settings.FFMpegLocation != '': AudioSegment.converter = settings.FFMpegLocation AudioSegment.from_file(localTempFile).export(localMp3File, format='mp3', bitrate='96k') # THEN add an updated media URL and media type # THEN add a cleanup routine to delete copies that are not in top x00 # THEN add a routine to do this update for already-stored media os.remove(localTempFile) except: err = str(sys.exc_info()[0]) + ' -> ' + str(sys.exc_info()[1]) log.log('ERROR', err) localMp3URL = '' return localMp3URL
def run(self): current_files = [] while True: for file in os.listdir(self.scan_directory): if file.endswith('.wav') and file not in current_files: AudioSegment.from_wav(self.scan_directory+file).export(self.mp3_directory + file[:-3] + 'mp3', format='mp3') current_files.append(file)
def createSoundFile(morse): dot = AudioSegment.from_wav(r"C:\Users\Gaurav Keswani\Documents\Eclipse\Morse-Code-Generator\src\resources\sound\dot.wav") dash = AudioSegment.from_wav(r"C:\Users\Gaurav Keswani\Documents\Eclipse\Morse-Code-Generator\src\resources\sound\dash.wav") #word_gap = AudioSegment.from_wav(r"C:\Users\Gaurav Keswani\Documents\Eclipse\Morse-Code-Generator\src\resources\sound\void.wav") sound_config = AudioSegment.empty() #Splitting the morse sentence into various word codes codes = morse.split(" ") for morseWord in codes: #Splitting each word code into individual codes for item in morseWord: #Adding dot sound for zero if item == "0": sound_config += dot #Adding dash sound for one elif item == "1": sound_config += dash #Adding a 100ms wait between each alphabet else: sound_config += AudioSegment.silent(300) sound_config += dot[0.1:0.2] #Exporting the sound file as output.wav sound_config.export(r"C:\Users\Gaurav Keswani\Documents\Eclipse\Morse-Code-Generator\src\resources\sound\morse.wav", format="wav")
def MangleLibrary(src, dst, audio, splice=None): if os.path.splitext(audio)[1] != ".mp3": raise "Prank audio is not an mp3" prank = AudioSegment.from_mp3(audio) # Walk src for root, dirs, files in os.walk(src): # Loop through files in this dir for fn in files: # If file is an mp3 if os.path.splitext(fn)[1] == ".mp3": # Import song fullsong = AudioSegment.from_mp3(root+"/"+fn) # Pick random location between 10s and end of song start = random.randint(15,60) print("Spliced {} after {} seconds".format(root+"/"+fn,start)) # Splice in prank song if splice != None: r = random.randint(0,len(splice)-1) final = fullsong[:start*1000] + prank[splice[r][0]:splice[r][1]] + fullsong[start*1000:] # final = fullsong[:start*1000] + prank + fullsong[start*1000:] else: final = fullsong[:start*1000] + prank # Recreate directory structrue in dst if not os.path.exists(dst+"/"+root): os.makedirs(dst+"/"+root) # Export song with tags final.export(dst+"/"+root+"/"+fn, format="mp3", tags=mediainfo(root+"/"+fn).get('TAG', {}))
def outputTrack(playList): au_file(name='master.au', freq=0, dur=playList[len(playList)-1][0][1], vol=0.2) masterSong = AudioSegment.from_file("master.au", "au") for item in playList: #obten la longitudDelSegmento longitudDelSegmento = int(item[0][1]) - int(item[0][0]) #obten Si se loopea loops = item[2] #crea los sonidos de esta seccion sonidoNum = 1 #integra un contador para los sonidos #crea un sonido temporal que contendra toda esta seccion au_file(name="instrumento.au", freq=0, dur=longitudDelSegmento, vol=1) for itemSonido in item[1]: nombre = 'sound' + str(sonidoNum) +".au" #print(nombre,itemSonido[2],itemSonido[1], float(itemSonido[0])) au_file(name=nombre, freq=int(itemSonido[2]), dur=int(itemSonido[1]), vol=float(itemSonido[0])) sonidoNum += 1 instrumento = AudioSegment.from_file("instrumento.au", "au") for i in range(1, sonidoNum): nombre = 'sound' + str(i) +".au" #abreElArchivo temp = AudioSegment.from_file(nombre, "au") #insertaloEnElinstrumento instrumento = instrumento.overlay(temp, position=0, loop=loops) #concatenaElInstrumento instrumento = instrumento[:longitudDelSegmento] #sobrelapa los sonidos en master masterSong = masterSong.overlay(instrumento, position=int(item[0][0])) #final = masterSong*2 masterSong.export("testingSong.emepetres", format="mp3")
def main(): wav_pat = re.compile(r'\.wav$') #print('End wav path:', end_path) #sound_end = AudioSegment.from_wav(end_path).set_frame_rate(16000) for pair in wav_folders: folder_name = pair[0] input_folder = input_folder_prefix + '/' + folder_name + '/' + input_folder_suffix output_folder = output_folder_prefix + '/' + folder_name + '/' + output_folder_suffix if not os.path.exists(output_folder): os.makedirs(output_folder) # find all files with wav suffix files = list(filter(lambda x: wav_pat.search(x),os.listdir(input_folder))) num_file = len(files) last_wav_pat = re.compile(str(num_file) + r'\.wav$') for filename in files: #run_single(input_folder + '/' + filename, output_folder + '/' + filename) print('------') print('Processing %s...' % (input_folder + '/' + filename)) sound_input = AudioSegment.from_wav(input_folder + '/' + filename) if last_wav_pat.search(filename): end_filename = random_pick(end_wavs2) else: end_filename = random_pick(end_wavs1) print('End tone filename:%s' % (end_filename)) sound_end = AudioSegment.from_wav(end_filename).set_frame_rate(16000) sound_combined = sound_input + sound_end sound_combined.export(output_folder + '/' + filename, format="wav")
def setUp(self): global test1, test2, test3 if not test1: test1 = AudioSegment.from_mp3(os.path.join(data_dir, 'test1.mp3')) test2 = AudioSegment.from_mp3(os.path.join(data_dir, 'test2.mp3')) test3 = AudioSegment.from_mp3(os.path.join(data_dir, 'test3.mp3')) self.seg1, self.seg2, self.seg3 = test1, test2, test3
def morsesound(sentence, freq=1000, length=100, path ='output\\'): """Turns a sentence into a morse soundfile""" mor = morse(sentence) from pydub.generators import Sine from pydub import AudioSegment import re dot = Sine(freq).to_audio_segment(length) dash = Sine(freq).to_audio_segment(length*3) sil1 = AudioSegment.silent(length) sil3 = AudioSegment.silent(length*3) result = AudioSegment.silent(length) for a in mor: if a == ".": result += dot elif a == "-": result += dash elif a == "/": result += sil1 else: result += sil3 result += sil1 filename = path + re.sub(r'[/\?!:*|",.]','',sentence) + '.mp3' result.export(filename,format="mp3") return filename
def generateFile(self): wav = default_storage.open('songs/' + str(self.pk) + '.wav', 'wb') final = None pitches = map(int, self.pitches.split(',')) durations = map(int, self.durations.split(',')) for pitch, duration in zip(pitches, durations): fn = 'pitches/' + pitchTable[pitch] + '.wav' pf = default_storage.open(fn) if final is None: final = AudioSegment(pf)[0:durationTable[duration]] else: final += AudioSegment(pf)[0:durationTable[duration]] # Copied from AudioSegment source... # I should have changed AudioSegment (getWaveFileContents() or something) and submitted a pull request but I have a deadline # Possibly optimize to just have a string packed with data then use ContentFile instead of File below wave_data = wave.open(wav, 'wb') wave_data.setnchannels(final.channels) wave_data.setsampwidth(final.sample_width) wave_data.setframerate(final.frame_rate) wave_data.setnframes(int(final.frame_count())) wave_data.writeframesraw(final._data) wave_data.close() wav.close() # ? wav_rb = default_storage.open('songs/' + str(self.pk) + '.wav', 'rb') self.wav.save('songs/' + str(self.pk) + '.wav', File(wav_rb)) wav_rb.close()
def responseToAnAudioCachonism(self, bot, update): message= update["message"] user= message.from_user["username"] if user=="": user= message.from_user["first_name"] responseText= "Hey %s. Soy TeofiBot. Mira lo que hago con tu nota de voz..." % user if user not in ["TeoGol29"]: downAudio= bot.getFile(message.voice.file_id) urllib.urlretrieve (downAudio.file_path, downAudio.file_id) sound1 = AudioSegment.from_file(downAudio.file_id) source_path= self.get_resource_path("sounds", "mi_creador.mp3") sound2 = AudioSegment.from_file(source_path) sound1 = sound1 + 1 sound2 = sound2 - 8 combined = sound1.overlay(sound2) audio_mix_filename="mix_"+downAudio.file_id combined.export(audio_mix_filename , format='mp3') bot.sendMessage(chat_id=update.message.chat_id, text=responseText) bot.sendAudio(chat_id=update.message.chat_id, audio=open(audio_mix_filename, 'rb'), caption='TeofiBot saboteandote con sabor') os.remove(downAudio.file_id) os.remove(audio_mix_filename)
def match_length(input_path, output_path, match_path, force=False): """ Speeds up or slows down a wav file so that the length matches the length of another wav file. :param input_path: the input wav path :param output_path: the output wav path :param match_path: the path of the wav to match the length of :param force: call recursively if the input_path and match_path lengths vary greatly (not in betwee 0.5 and 2.0) :returns: -1 if a file does not exist or ffmpeg fails """ if check_file_paths([input_path, match_path]) == -1: return -1 input_segment = AudioSegment.from_file(input_path) input_length = input_segment.duration_seconds match_segment = AudioSegment.from_file(match_path) match_seg_length = match_segment.duration_seconds length_coefficient = input_length / match_seg_length if length_coefficient < 2.0 and length_coefficient > 0.5: change_length(input_path, output_path, length_coefficient) return 0 if force: if length_coefficient > 2.0: change_length(input_path, input_path, 2.0) match_length(input_path, output_path, match_path, force=True) else: change_length(input_path, input_path, 0.5) match_length(input_path, output_path, match_path, force=True) else: print 'wrong size' return -1
def process_sounds(sounds, file_format, bucket, s3_extension, sample_duration, fade_duration, sample_start): preview = AudioSegment.empty() sample_filenames = [] for count, sound in enumerate(sounds, 1): print('\nDownloading and sampling {} of {}, {:.0f}% complete'.format(count, len(sounds), (count / len(sounds)) * 100)) print(sound['name'], sound['url']) key = bucket.get_key(sound['id'] + s3_extension if s3_extension else sound['id']) source_filename = tempfile.NamedTemporaryFile(prefix='/tmp/', suffix='.{}'.format(file_format)).name sample_filename = tempfile.NamedTemporaryFile(prefix='/tmp/', suffix='.{}'.format(file_format)).name get_sample_from_key.delay(source_filename, sample_filename, key, file_format, sound, sample_start, sample_duration) sample_filenames.append(sample_filename) wait(get_sample_from_key) for count, sample_filename in enumerate(sample_filenames, 1): print('\nProcessing {} of {}, {:.0f}% complete'.format(count, len(sounds), (count / len(sounds)) * 100)) print(sample_filename) sample = AudioSegment.from_file(sample_filename, format=file_format) #Append sample with cross fade preview = preview.append(sample, crossfade=fade_duration * config.one_second) if preview else sample return preview
def audiodata_getter(path, date, filedate, filename, index): #Check to see if it's a wav file. If not, convert in a temp file. splitname = os.path.splitext(filename)[0] if os.path.splitext(filename)[1] != ".wav": temp = tempfile.NamedTemporaryFile(suffix=".wav") if os.path.splitext(filename)[1] == ".mp3": if "mp3" in path and date is None: sound = AudioSegment.from_file(path + filedate[index] + "/" + filename, "mp3") else: sound = AudioSegment.from_file(path + filename, "mp3") sound.export(temp.name, format = "wav") if os.path.splitext(filename)[1] == ".flac": if "mp3" in path and date is None: sound = AudioSegment.from_file(path + filedate[index] + "/" + filename, "flac") else: sound = AudioSegment.from_file(path + filename, "flac") sound.export(temp.name, format = "flac") try: wav = wave.open(temp, 'r') return wav except: print(filename + " corrupted or not audio file.") else: try: #Open the .wav file and get the vital information wav = wave.open(path + "/audio/" + filename, 'r') return wav except: print(filename + " corrupted or not audio file.")
def interpret(file, bpm, metronome, dondokos): dirstack = file.split(os.sep)[:-1] song = [] now = 0 def play_voice(name, bpm, metronome, dondokos): global PADDING drum_file = os.sep.join(dirstack + [name]) + '.drum' pattern = read_pattern(open(drum_file)) voice = play(pattern, bpm, metronome, dondokos) beats = (((len(pattern) + PADDING - 1) / PADDING) * PADDING) / 4 beats = len(pattern) / 4 return voice, beats for line in read_song(open(file)): if ':' in line: key, value = line.split(':') if key == 'bpm': bpm += int(value) elif key == 'metronome': try: metronome = int(value) except: metronome = None elif key == 'dondokos': try: dondokos = int(value) except: dondokos = None else: print('bad key/value pair:', [key, value]) continue if '|' in line: voices = [] beats = 0 for name in [n.strip() for n in line.split('|')]: print(name, end='', flush=True) v, b = play_voice(name, bpm, metronome, dondokos) voices.append(v) beats = max(beats, b) voice = AudioSegment.silent(duration=(60000.0 / bpm)*beats+2000) for v in voices: voice = voice.overlay(v) else: voice, beats = play_voice(line, bpm, metronome, dondokos) song.append((now, voice)) time = beats * (60000.0 / bpm) now += time master_mix = AudioSegment.silent(duration=now+2000) for when, voice in song: master_mix = master_mix.overlay(voice, position=when) return master_mix
def split(audio_file): filename, file_extension = os.path.splitext(audio_file) if file_extension == ".mp3": song = AudioSegment.from_mp3(audio_file) split_song(filename, song) elif file_extension == ".wav": song = AudioSegment.from_wav(audio_file) split_song(filename, song)
def test_exporting_to_ogg_uses_default_codec_when_codec_param_is_none(self): with NamedTemporaryFile('w+b', suffix='.ogg') as tmp_ogg_file: AudioSegment.from_file(self.mp4_file_path).export(tmp_ogg_file, format="ogg") info = mediainfo(filepath=tmp_ogg_file.name) self.assertEqual(info["codec_name"], "vorbis") self.assertEqual(info["format_name"], "ogg")
def get_response_obj(self, req_obj): def cleanup_temp_files(): for file_path in self.__temp_files: if os.path.isfile(file_path): os.remove(file_path) # Load file from url src_file_urls_list = req_obj['FileUrlsList'] src_file_paths_list = [] sox_target_audio_params = [ '-r', str(self.__DENOISER_SAMPLE_RATE), '-b', '32', '-e', 'float' ] for src_file_url in src_file_urls_list: long_file_name = self.__get_file_name_from_url(src_file_url) question_mark_index = long_file_name.find('?') if question_mark_index > -1: long_file_name = long_file_name[0:question_mark_index] long_file_path = os.path.join(tempfile.gettempdir(), long_file_name) long_file_path = os.path.join('/denoising/audio/', long_file_name) if not os.path.exists(long_file_path): self.__logger.info( f'TRY: Save initial file to {long_file_path}') self.__upload_and_save_file(src_file_url, long_file_path) self.__logger.info( f'SUCCESS: Initial file saved to {long_file_path}') self.__temp_files.append(long_file_path) # Convert initial file to wav with target sample rate long_file_wav_path = long_file_path.replace('.mp3', '.wav') self.__logger.info( f'TRY: Convert initial file to target audio params to file: {long_file_wav_path}' ) process = subprocess.Popen([ "sox", long_file_path, *sox_target_audio_params, long_file_wav_path ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) process.communicate() self.__logger.info( f'SUCCESS: File converted to "{long_file_wav_path}"') self.__temp_files.append(long_file_path) self.__temp_files.append(long_file_wav_path) src_file_paths_list.append(long_file_wav_path) self.__logger.info(f'Src wav file path is {long_file_wav_path}') assert os.path.isfile(long_file_wav_path) denoised_file_urls_list = [] for src_file_path in src_file_paths_list: denoised_full_file_path = src_file_path.replace( '.wav', '_denoised.wav') self.__logger.info( f'Target denoised full file path is {denoised_full_file_path}') src_wav_file_obj = AudioSegment.from_wav(src_file_path) # Split initial file to parts cur_src_file_part_start = 0 cur_src_file_part_end = self.__MAX_AUDIO_DURATION_SECONDS if cur_src_file_part_end > src_wav_file_obj.duration_seconds: cur_src_file_part_end = src_wav_file_obj.duration_seconds denoised_file_parts_paths_list = [] it = 1 while cur_src_file_part_start < src_wav_file_obj.duration_seconds: src_file_part_path = src_file_path.replace( '.wav', f'_part_{it}.wav') denoised_file_part_path = denoised_full_file_path.replace( '.wav', f'_part_{it}.wav') self.__logger.debug( f'Denoised part file path is {denoised_file_part_path}') self.__logger.debug( f'Src part stamps is {cur_src_file_part_start}s - {cur_src_file_part_end}s' ) self.__logger.debug( f'Src part file path is {src_file_part_path}') process = subprocess.Popen([ "sox", src_file_path, src_file_part_path, 'trim', self.__seconds_to_span(cur_src_file_part_start), self.__seconds_to_span(cur_src_file_part_end) ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) process.communicate() self.__logger.info( f'Call denoising of file part "{src_file_part_path}" to: "{denoised_file_part_path}"' ) self.call_denoiser(src_file_part_path, denoised_file_part_path) denoised_file_parts_paths_list.append(denoised_file_part_path) # Move parts splitting window forward it += 1 cur_src_file_part_start = cur_src_file_part_end cur_src_file_part_end += self.__MAX_AUDIO_DURATION_SECONDS if cur_src_file_part_end > src_wav_file_obj.duration_seconds: cur_src_file_part_end = src_wav_file_obj.duration_seconds denoised_mp3_file_path = denoised_full_file_path.replace( '.wav', '.mp3') self.__logger.info(f'Concat part files to one denoised file') process = subprocess.Popen([ "sox", *denoised_file_parts_paths_list, denoised_mp3_file_path ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) process.communicate() self.__logger.debug( f'Resulting file exported to {denoised_mp3_file_path}') assert os.path.isfile(denoised_mp3_file_path) cloud_save_file_name = denoised_mp3_file_path.replace( '\\', '/').split('/')[-1] self.__logger.info( f'TRY: Save file to cloud as: "{cloud_save_file_name}"') saved_file_url = self.__cloud_storage.save_object_to_storage( denoised_mp3_file_path, cloud_save_file_name) self.__logger.info( f'SUCCESS: Saved denoised file URL is: "{saved_file_url}"') denoised_file_urls_list.append(saved_file_url) self.__temp_files.append(denoised_mp3_file_path) res = {'FileUrlsList': denoised_file_urls_list} cleanup_temp_files() return res
def download(): # 파일이 업로드 되면 실시 할 과정 if request.method == 'POST': f = request.files['file'] if not f: return render_template('upload.html') f_path = os.path.splitext(str(f)) f_path = os.path.split(f_path[0]) folder_path = 'c:/nmb/nmb_data/web/chunk/' normalizedsound = normalized_sound(f) audio_chunks = split_slience(normalizedsound) save_script = '' female_list = list() male_list = list() for i, chunk in enumerate(audio_chunks): speaker_stt = list() out_file = "chunk.wav" chunk.export(out_file, format='wav') aaa = sr.AudioFile(out_file) try: f = open('c:/nmb/nada/web/static/test.txt', 'wt', encoding='utf-8') ff = open('c:/nmb/nada/web/static/test_female.txt', 'wt', encoding='utf-8') fm = open('c:/nmb/nada/web/static/test_male.txt', 'wt', encoding='utf-8') stt_text = STT(aaa) speaker_stt.append(str(stt_text)) y, sample_rate = librosa.load(out_file, sr=22050) if len(y) >= 22050 * 5: y = y[:22050 * 5] speaker = predict_speaker(y, sample_rate) speaker_stt.append(str(speaker)) print(speaker_stt[1], " : ", speaker_stt[0]) if speaker == '여자': female_list.append(str(speaker_stt[0])) else: male_list.append(str(speaker_stt[0])) else: audio_copy = AudioSegment.from_wav(out_file) audio_copy = copy.deepcopy(audio_copy) for num in range(3): audio_copy = audio_copy.append( copy.deepcopy(audio_copy), crossfade=0) out_file_over5s = "chunk_over_5s.wav" audio_copy.export(out_file_over5s, format='wav') y_copy, sample_rate = librosa.load(out_file_over5s, sr=22050) y_copy = y_copy[:22050 * 5] speaker = predict_speaker(y_copy, sample_rate) speaker_stt.append(str(speaker)) print(speaker_stt[1] + " : " + speaker_stt[0]) if speaker == '여자': female_list.append(str(speaker_stt[0])) else: male_list.append(str(speaker_stt[0])) save_script += speaker_stt[1] + " : " + speaker_stt[0] + '\n\n' f.writelines(save_script) ff.writelines('\n\n'.join(female_list)) fm.writelines('\n\n'.join(male_list)) # chunk.wav 파일 삭제하기 if os.path.isfile(out_file): os.remove(out_file) if os.path.isfile(out_file_over5s): os.remove(out_file_over5s) except: pass f.close() ff.close() fm.close() return render_template('/download.html')
segment_text = [] text = " ".join(trsc[key]) for split in text_splits: w1, w2 = split w = w1 + " " + w2 ids = text.find(w) assert (ids != -1) segment_text.append(text[:ids + len(w1)]) text = text[ids + len(w1):] # handle last segment segment_text.append(text) # print(segment_text) # perform sequential cutting with open(path + key + ".wav", 'rb') as in_f: current_audio = AudioSegment.from_wav(in_f) for n, split in enumerate(cutter): part1 = current_audio[:split] with open(out_path + key + "_cut_" + str(n) + ".wav", 'wb') as out_sound: handle = part1.export(out_sound, format="wav") handle.close() out_txt.write(key + "_cut_" + str(n) + ".wav" + "|" + segment_text[n] + "\n") current_audio = current_audio[split:] # handle last cut out_txt.write(key + "_cut_" + str(n + 1) + ".wav" + "|" + segment_text[n + 1] + "\n") with open(out_path + key + "_cut_" + str(n + 1) + ".wav",
else: num_angles = max_num_angles beat_counter += 1 asc = True theta += 0.1 pygame.display.flip() if __name__ == "__main__": audio_dir = os.getcwd() + "/music/" recipe_dir = os.getcwd() + "/recipes/" drum_file = "drum_file.csd" output_file = "out.wav" song_length = 60 songs = intro_dialogue(audio_dir) print() recipe_output = recipe_dialogue(recipe_dir) recipe_file = recipe_output if not recipe_output else recipe_dir + recipe_output first_song_file = audio_dir + songs[0] second_song_file = audio_dir + songs[1] track_length = AudioSegment.from_mp3(first_song_file).duration_seconds bpm = get_bpm(first_song_file) compile_drum_file(bpm, track_length, drum_file, output_file) mixed_song = mix_songs(first_song_file, second_song_file, output_file, song_length, recipe_file) os.remove(output_file) beats = get_beats(mixed_song) thread = Process(target=play, args=(mixed_song,)) thread.start() run_animation(beats, thread)
def decide_action(transcript, final_string): print("0") #set up the mixer freq = 44100 # audio CD quality bitsize = -16 # unsigned 16 bit channels = 2 # 1 is mono, 2 is stereo buffer = 2048 # number of samples (experiment to get right sound) pygame.mixer.init(freq, bitsize, channels, buffer) score = sentiment_analysis(final_string) print('string=' + final_string) print("1") print(score) music_file_sent = "music/narration/message_sent.wav" myAudio_sent = pygame.mixer.Sound(music_file_sent) myAudio_sent.set_volume(0.9) pygame.mixer.Channel(3).play(myAudio_sent) time.sleep(2) initAudio = AudioSegment.from_mp3("output1.wav") louderAudio = initAudio + 25 # play(louder_song) louderAudio.export("output2.wav", format="wav") for i in range(3): # play_audio() moon = "music/narration/moon_cropped.wav" tina = "music/narration/tina.wav" moon_audio = pygame.mixer.Sound(moon) tina_audio = pygame.mixer.Sound(tina) if score >= 0.1: print("positive") music_file1 = "music/positive/1.wav" music_file2 = "output2.wav" #Create sound object for each Audio myAudio1 = pygame.mixer.Sound(music_file1) time.sleep(1) myAudio2 = pygame.mixer.Sound(music_file2) #Add Audio to first channel myAudio1.set_volume(1.0) myAudio2.set_volume(1.0) print("Playing audio : ", music_file1) pygame.mixer.Channel(1).play(myAudio1) time.sleep(4) pygame.mixer.Channel(0).play(myAudio2) lights.fillEach((0,40,0),0.05) elif score >= -0.1 and score <=0.1: print("neutral") music_file1 = "music/neutral/1.wav" music_file2 = "output2.wav" #Create sound object for each Audio myAudio1 = pygame.mixer.Sound(music_file1) time.sleep(1) myAudio2 = pygame.mixer.Sound(music_file2) #Add Audio to first channel myAudio1.set_volume(1.0) myAudio2.set_volume(1.0) print("Playing audio: ", music_file1) pygame.mixer.Channel(1).play(myAudio1) time.sleep(4) pygame.mixer.Channel(0).play(myAudio2) lights.fillEach((40,0,40),0.05) elif score <= -0.1: print("negative") music_file1 = "music/negative/2.wav" music_file2 = "output2.wav" #Create sound object for each Audio myAudio1 = pygame.mixer.Sound(music_file1) myAudio2 = pygame.mixer.Sound(music_file2) time.sleep(1) #Add Audio to first channel myAudio1.set_volume(1.0) myAudio2.set_volume(1.0) print("Playing audio: ", music_file1) pygame.mixer.Channel(1).play(myAudio1) time.sleep(4) pygame.mixer.Channel(0).play(myAudio2) lights.fillEach((40,0,0),0.05) time.sleep(12) moon_audio.set_volume(0.6) tina_audio.set_volume(0.6) pygame.mixer.Channel(2).play(moon_audio) time.sleep(1) pygame.mixer.Channel(3).play(tina_audio) time.sleep(15)
def play(self): to_play = AudioSegment.from_file(self._filepath) play(to_play)
def contact(folder): file_path = './input/{0}/{0}'.format(folder) # 处理excel loc = (file_path + ".xls") wb = xlrd.open_workbook(loc) sheet = wb.sheet_by_index(0) # 检查句子是否重复 sentence_exist = {} for i in range(1, sheet.nrows): sentence = sheet.cell_value(i, 3) if sentence_exist.get(sentence): print("{0}重复了".format(sentence)) else: sentence_exist.setdefault(sentence, True) #处理音频 woman_sound = AudioSegment.from_file(file_path + '_女.mp3', format="mp3") woman_chunks = split_on_silence(woman_sound, min_silence_len=1000, silence_thresh=-55) # exportChunks(woman_chunks) woman_slow_sound = AudioSegment.from_file(file_path + '_女慢.mp3', format="mp3") woman_slow_chunks = split_on_silence(woman_slow_sound, min_silence_len=1500, silence_thresh=-55) # exportChunks(woman_slow_chunks) man_sound = AudioSegment.from_file(file_path + '_男.mp3', format="mp3") man_chunks = split_on_silence(man_sound, min_silence_len=500, silence_thresh=-55) # exportChunks(man_chunks) print("excel中共{0}个句子".format(sheet.nrows - 1)) print("女生 音频中共划分出{0}个音频".format(len(woman_chunks))) print("女生慢 音频中共划分出{0}个音频".format(len(woman_slow_chunks))) print("男生 音频中共划分出{0}个音频".format(len(man_chunks))) # 开始输出 count = 0 for i in range(1, sheet.nrows): sentence = sheet.cell_value(sheet.nrows - i, 3) sentence = processSentence(sentence) path = "./output/{0}/{1}.mp3".format(folder, sentence) if os.path.exists(path): print("{0}已经存在".format(sentence)) continue chinese_chunk = increaseDB(woman_chunks[-i * 2]) man_chunk = increaseDB(man_chunks[-i]) woman_slow_chunk = increaseDB(woman_slow_chunks[-i]) contacted_chunk = silence_sound * 2 + chinese_chunk + silence_sound * 3 + man_chunk + silence_sound * 3 + woman_slow_chunk + silence_sound * 2 contacted_chunk.export(path, format="mp3") count = count + 1 print("此次共生成{0}个单词音频".format(count))
def padding(wav, white_noise_duration): # print("WAV FILE: " + wav) for x in white_noise_duration: if x == 0: wav_files = [] padded_fname = wav.rsplit(".", 1)[0] # print("PADDED NAME: " + padded_fname) silence_duration = max(white_noise_duration) # print(padded_fname+"_whitenoise.wav") # convert sampling rate, bits per sample, audio channel subprocess.call([ "ffmpeg", "-i", wav, "-ar", "44100", "-ac", "2", padded_fname + "_converted.wav", "-y", ]) # white noise duration should be a list e.g [0,1] # generate white noise wav file wn = WhiteNoise().to_audio_segment(duration=silence_duration * 1000) wn.export( padded_fname + "_whitenoise.wav", format="wav", parameters=["-ar", "16000"], ) # stitch white noise wav file to specific audio wav file # before new_wav_before = AudioSegment.from_wav( padded_fname + "_whitenoise.wav") + AudioSegment.from_wav(padded_fname + "_converted.wav") new_wav_before.export( padded_fname + "_padded" + "_" + str(white_noise_duration[1]) + "_" + str(white_noise_duration[0]) + ".wav", format="wav", parameters=["-ar", "16000"], ) # after new_wav_after = AudioSegment.from_wav( padded_fname + "_converted.wav") + AudioSegment.from_wav(padded_fname + "_whitenoise.wav") new_wav_after.export( padded_fname + "_padded" + "_" + str(white_noise_duration[0]) + "_" + str(white_noise_duration[1]) + ".wav", format="wav", parameters=["-ar", "16000"], ) # remove white noise wav file os.remove(padded_fname + "_whitenoise.wav") os.remove(padded_fname + "_converted.wav") wav_files.append(padded_fname + "_padded" + "_" + str(white_noise_duration[1]) + "_" + str(white_noise_duration[0]) + ".wav") wav_files.append(padded_fname + "_padded" + "_" + str(white_noise_duration[0]) + "_" + str(white_noise_duration[1]) + ".wav") break else: wav_files = [] padded_fname = (wav.rsplit(".", 1)[0]).split("/")[-1] # print("PADDED FILENAME: " + padded_fname) path = (wav.rsplit(".", 1)[0]).rsplit("/", 1)[0] # print("PATH: "+ path) fn = (wav.rsplit(".", 1)[0]).rsplit("/", 1)[1] # print("FILENAME: " + fn) # white noise duration should be a list e.g [0,1] # generate white noise wav file # wn_0 = AudioSegment.silent(duration=white_noise_duration[0] * 1000) wn_0 = WhiteNoise().to_audio_segment( duration=white_noise_duration[0] * 1000) wn_0.export(wav + "_whitenoise_0.wav", format="wav", parameters=["-ar", "16000"]) # wn_1 = AudioSegment.silent(duration=white_noise_duration[1] * 1000) wn_1 = WhiteNoise().to_audio_segment( duration=white_noise_duration[1] * 1000) wn_1.export(wav + "_whitenoise_1.wav", format="wav", parameters=["-ar", "16000"]) # stitch white noise wav file to specific audio wav file new_wav = (AudioSegment.from_wav(wav + "_whitenoise_0.wav") + AudioSegment.from_wav(wav) + AudioSegment.from_wav(wav + "_whitenoise_1.wav")) new_wav.export( path + "/" + padded_fname + "_padded" + "_" + str(white_noise_duration[0]) + "_" + str(white_noise_duration[1]) + ".wav", format="wav", parameters=["-ar", "16000"], ) # after new_wav_reverse = ( AudioSegment.from_wav(wav + "_whitenoise_1.wav") + AudioSegment.from_wav(wav) + AudioSegment.from_wav(wav + "_whitenoise_0.wav")) new_wav_reverse.export( path + "/" + padded_fname + "_padded" + "_" + str(white_noise_duration[1]) + "_" + str(white_noise_duration[0]) + ".wav", format="wav", parameters=["-ar", "16000"], ) # remove white noise wav file os.remove(wav + "_whitenoise_0.wav") os.remove(wav + "_whitenoise_1.wav") wav_files.append(path + "/" + padded_fname + "_padded" + "_" + str(white_noise_duration[0]) + "_" + str(white_noise_duration[1]) + ".wav") wav_files.append(path + "/" + padded_fname + "_padded" + "_" + str(white_noise_duration[1]) + "_" + str(white_noise_duration[0]) + ".wav") # If adding to one folder, specify the path of folder! # new_wav.export("output_/"+fn+"_padded"+"_"+str(white_noise_duration[0])+"_"+str(white_noise_duration[1])+".wav", format="wav", parameters=["-ar", "16000"]) # new_wav_reverse.export("output_/"+fn+"_padded"+"_"+str(white_noise_duration[1])+"_"+str(white_noise_duration[0])+".wav", format="wav", parameters=["-ar", "16000"]) break return wav_files
def text_to_speech(self, text): tts = gTTS(text=text, lang='en') tts.save("temp.mp3") output_speech = AudioSegment.from_mp3("temp.mp3") play(output_speech) os.remove("temp.mp3")
from pydub import AudioSegment combined = AudioSegment.empty() for n in range(1, 30): path = 'NTV_{}.wav' print(path.format(n)) realpath = path.format(n) sound = AudioSegment.from_wav(realpath) combined += sound combined.export("s5.wav", format="wav")
def stereo_to_mono(audio_file_name): sound = AudioSegment.from_wav(audio_file_name) sound = sound.set_channels(1) sound.export(audio_file_name, format="wav")
) #convert amp and phase to cartesian #constants height = 3 width = 15 #plot sizes axis = 0 rows = 1 columns = 1 seg_size = 256 #splitting to 2 seperate wav files (by milliseconds). t1 = 0 t2 = 6500 t3 = 13000 #Works in milliseconds #split file to 2 wavFile = AudioSegment.from_wav("bark of the pine tree.wav") newAudio = wavFile[t1:t2] # 6.5 first seconds newAudio.export('part1_before.wav', format="wav") #Exports to a wav file in the current path. newAudio2 = wavFile[t2:t3] newAudio2.export('part2_before.wav', format="wav") #Exports to a wav file in the current path. #read sample rate and wavData samplerate, data1 = wavfile.read('part1_before.wav') samplerate, data2 = wavfile.read('part2_before.wav') data1 = data1[0:data1.shape[0] - np.mod(data1.shape[0], seg_size)] #so it will be divided evenly data1 = np.split(data1, int(data1.shape[0] / seg_size)) #split to 256 sized arrays
def get_mp3_audio_features(filename): ''' Generates the following audio features for each song: - tempo (harmonic, precussive) - beats (harmonic, precussive) - root mean square energy per segment (mean, median, std) - song duration (seconds) - engineered tempo fearures INPUT : - filename : filepath to single song file (.mp3 or .wav) OUTPUT : - audio_data : list of 10 extract audio features - y : audio time series (ndarray) - sr : sampling rate of y (float) ''' print "Processing: " + filename + " this may take a while ..." # Load audio data y, sr = librosa.load(filename) print "loading file.." # Get Harmonic and Percussive Tempo & Beats y_harmonic, y_percussive = librosa.effects.hpss(y) h_tempo, h_beats = librosa.beat.beat_track(y=y_harmonic, sr=sr) p_tempo, p_beats = librosa.beat.beat_track(y=y_percussive, sr=sr) tempo_differ = h_tempo - p_tempo if (p_tempo <= 120) & (tempo_differ == 0): slow_tempo_correction = p_tempo * 2 else: slow_tempo_correction = p_tempo print slow_tempo_correction print "Processed tempo & beats" # Get Root Mean Squared Energy (avg, median & standard deviation) rmse_arr = librosa.feature.rmse(y=y) avg_rmse = rmse_arr.mean() med_rmse = np.ma.median(rmse_arr) std_rmse = rmse_arr.std() print "Procesed RMSEs" # Get length of song try: song = AudioSegment.from_file(filename) song_duration = song.duration_seconds print "Processed durations" except: "error getting song duration" song_duration = np.NaN audio_data = [ h_tempo, len(h_beats), p_tempo, len(p_beats), avg_rmse, med_rmse, std_rmse, song_duration, tempo_differ, slow_tempo_correction ] return audio_data, y, sr
def convert_from_mp3_to_wav(self): """Convert given audio file from mp3 to wav""" wav_file = AudioSegment.from_mp3(self.mp3_file).export(self.wav_file, format="wav") log.info('wav file generated: {}'.format(self.wav_filename)) return wav_file
from pydub import AudioSegment sound = AudioSegment.from_mp3("audio_files/1.mp3") sound.export("audio_files/1.wav", format="wav")
def sound_alarm(): song = AudioSegment.from_wav("firePagerAlert.wav") first_second = song[:1000] play(first_second-30)
from pydub import AudioSegment from pydub.silence import split_on_silence import xlrd import os silence_sound = AudioSegment.from_file('./silence.mp3', format="mp3") def addSilence(chunk): return silence_sound + chunk + silence_sound def increaseDB(chunk): # 0.1 = 290 # base = 28000 add = (28000 - chunk.max) / 290 * 0.1 return chunk + add def processSentence(sen): sen = sen.strip() if sen.endswith('.') or sen.endswith('?') or sen.endswith('!'): sen = sen[:-1] return sen def exportChunks(chunks): for i in range(0, len(chunks)): path = "./output/test/{0}.mp3".format(i) chunks[i].export(path, format="mp3")
def readAudioFile(path, chunk_seconds=None, stereo=True): ''' This function returns a numpy array that stores the audio samples of a specified WAV of AIFF file ''' extension = os.path.splitext(path)[1] #print("------" + path) try: #if extension.lower() == '.wav': #[Fs, x] = wavfile.read(path) if extension.lower() == '.aif' or extension.lower() == '.aiff': s = aifc.open(path, 'r') nframes = s.getnframes() strsig = s.readframes(nframes) x = numpy.fromstring(strsig, numpy.short).byteswap() if stereo: x = stereo2mono(x) Fs = s.getframerate() duration = nframes / float(Fs) print("Duration: " + duration) elif extension.lower() == '.mp3' or extension.lower( ) == '.wav' or extension.lower() == '.au': try: print("Trying to make audio file") audiofile = AudioSegment.from_file(path) Fs = audiofile.frame_rate duration = audiofile.duration_seconds print("Duration: " + str(duration)) #except pydub.exceptions.CouldntDecodeError: except: print( "Error: file not found or other I/O error. (DECODING FAILED)" ) return (-1, -1) if audiofile.sample_width == 2: data = numpy.fromstring(audiofile._data, numpy.int16) elif audiofile.sample_width == 4: data = numpy.fromstring(audiofile._data, numpy.int32) else: return (-1, -1) #print("Audio data type: " + str(data.shape)) #print("Length: " + str(len(data))) if chunk_seconds: return_data = [] num_chunks = int(duration / chunk_seconds) #print("Number of chunks: " + str(num_chunks)) chunk_length = int(len(data) / num_chunks) for i in range(num_chunks): chunked_data = data[i * chunk_length:(i + 1) * chunk_length] x = [] for chn in xrange(audiofile.channels): x.append(chunked_data[chn::audiofile.channels]) x = numpy.array(x).T if x.ndim == 2: if x.shape[1] == 1: x = x.flatten() return_data.append((Fs, x)) #print("Return data length: " + str(len(return_data))) return return_data else: x = [] for chn in xrange(audiofile.channels): x.append(data[chn::audiofile.channels]) x = numpy.array(x).T else: print("Error in readAudioFile(): Unknown file type!") return (-1, -1) except IOError: print("Error: file not found or other I/O error.") return (-1, -1) if x.ndim == 2: if x.shape[1] == 1: x = x.flatten() return (Fs, x)
def loadData(self, database=None): if database is None: for db in self.databases: self.loadData(database=db) else: counter = 0 error = 0 cache_data = [] cache_labels = [] if self._isCached(database): data = np.load(f"cache/{database}.npy", allow_pickle=True) if database.split("_")[0] == "actor" and ( int(database.split("_")[1]) == self.random_1 or int(database.split("_")[1]) == self.random_2): for element in data[0]: self.x_test2.append(element) for element in data[1]: self.y_test2.append(element) print( f"[+] [{len(data[0])}] Loaded {database} from cache and added to EXTERNE VALIDITÄT (TOPF)" ) else: for element in data[0]: self.data.append(element) for element in data[1]: self.labels.append(element) print(f"[+] [{len(data[0])}] Loaded {database} from cache") else: for f in os.listdir("audio/" + database): try: s1 = AudioSegment.from_file(f"audio/{database}/{f}", format="wav") s2 = audiosegment.from_file(f"audio/{database}/{f}") except FileNotFoundError: error += 1 print( f"[+] [{counter} ({error})] {database}/{f}\t\t\t", end="\r") continue freqChange, freqAvg, freqMax = self._getFrequency(s2) fdata = self._parseName(f) loudPoly, dBFS, maxDBFS = self._getLoudness(s1) if database.split("_")[0] == "actor" and ( int(database.split("_")[1]) == self.random_1 or int(database.split("_")[1]) == self.random_2): self.x_test2.append( np.append( np.concatenate( (self._getDerivative(loudPoly), self._getDerivative(freqChange))), [dBFS, maxDBFS, freqAvg, freqMax])) self.y_test2.append( [fdata.get("emotion_n"), fdata.get("actor_n")]) else: self.data.append( np.append( np.concatenate( (self._getDerivative(loudPoly), self._getDerivative(freqChange))), [dBFS, maxDBFS, freqAvg, freqMax])) self.labels.append( [fdata.get("emotion_n"), fdata.get("actor_n")]) cache_data.append( np.append( np.concatenate((self._getDerivative(loudPoly), self._getDerivative(freqChange))), [dBFS, maxDBFS, freqAvg, freqMax])) cache_labels.append( [fdata.get("emotion_n"), fdata.get("actor_n")]) counter += 1 print( f"[+] [{counter} ({error})] Loading {database}/{f}...\t\t\t", end="\r") print( f"[+] [{counter} ({error})] Finished loading {database}\t\t\t\n", end="\r") self._cacheData(database, [cache_data, cache_labels]) self.databases_loaded.append(database)
def disgust_audio(): print("Playing sound...") sound = AudioSegment.from_file("../audios/Disgust_1.wav") sound.apply_gain(100) play(sound) return
import wave from pydub import AudioSegment import glob length = 7.000000000 files = glob.glob('/home/dell/Desktop/Project/PDb/*') for pth in files: f = wave.open(pth, 'r') frames = f.getnframes() rate = f.getframerate() duration = frames / float(rate) f.close() dur = length - duration pad_ms = dur * 1000 silence = AudioSegment.silent(duration=pad_ms) audio = AudioSegment.from_wav(pth) padded = audio + silence padded.export('/home/dell/Desktop/Project/PDc/' + pth[31:], format='wav')
import cv2 import numpy as np import glob import os from pathlib import Path from moviepy.editor import * from pydub import AudioSegment from silence import detect_silence, detect_nonsilent import shutil from speechgen import getspeech data_folder = Path("video/") getspeech() track = AudioSegment.from_wav("TTSOutput.wav") img_array = [] for filename in glob.glob('assets/*.jpg'): img = cv2.imread(filename) height, width, layers = img.shape size = (width, height) img_array.append(img) out = cv2.VideoWriter('still.mp4', cv2.VideoWriter_fourcc(*'MP4V'), 1, size) for i in range(len(img_array)): out.write(img_array[i]) out.release() arr = detect_silence(track) vocal = detect_nonsilent(track) print(arr, vocal)
def normalized_sound(auido_file): audio = AudioSegment.from_wav(auido_file) normalizedsound = effects.normalize(audio) return normalizedsound
def save_soundtrack(noises, duration=50_000): quiet = AudioSegment.silent(duration=duration) soundtrack = reduce(lambda a, b: a.overlay(b), noises, quiet) soundtrack.export(SOUNDTRACK_FILE, format="wav")
#!/usr/bin/python3 # coding: utf-8 from pydub import AudioSegment from pydub.silence import split_on_silence import random import os file = '/home/gswyhq/data/五十音.mp3' EXPORT_PATH = '/home/gswyhq/data/五十音图' time_start = "00:16" time_end = "01:35" song = AudioSegment.from_mp3(file) start = (int(time_start.split(':')[0]) * 60 + int(time_start.split(':')[1])) * 1000 end = (int(time_end.split(':')[0]) * 60 + int(time_end.split(':')[1])) * 1000 # print(start, end) # 剪切时间:是按ms 毫秒来的,所以时间格式的转换就要到毫秒级的。 word = song[start:end] # 这里silence_thresh是认定小于-42dBFS以下的为silence,然后需要保持小于-42dBFS超过 700毫秒。这样子分割成一段一段的。 # 最关键的就是这两个值的确定,这里需要我们用到foobar的一个功能:视图——可视化———音量计 # 可以观察一段音频的dBFS大小,正常的音量差不多都是在-25dBFS到-10dBFS。这个单位是从-96dBFS到0dBFS的,越靠近0,音量越大。 # 我们这里取-42dBFS以下的,认为是静音。然后可以用foobar估算每个单词中间的间隙时间,大概是在900ms也就是0.9s。我们还是取小一些 0.7s分割。 words = split_on_silence(word, min_silence_len=700, silence_thresh=-42) # 再来就是生成一个乱序的序列,然后把单词对应进去,然后中间插入空白静音1s。 silent = AudioSegment.silent(duration=1000) print("共分割出{}个音".format(len(words)))
def stretch_audio(audio, filepath, stretch_constant): audio.export(filepath, format="wav") y, sr = librosa.load(filepath, sr=None) y_stretched = pyrubberband.time_stretch(y, sr, stretch_constant) sf.write(filepath, y_stretched, sr, format='wav') return AudioSegment.from_file(filepath, format="wav")
async def mp4ToWav(fileName): inPath = f'{youtubeDir}/{fileName}.mp4' outPath = f'{wavDir}/{fileName}.wav' audio = AudioSegment.from_file(inPath) audio.export(outPath, format='wav')
async def wavToMp4(fileName): inPath = f'{processedDir}/{fileName}.wav' outPath = f'{streamDir}/{fileName}.mp4' audio = AudioSegment.from_file(inPath) audio.export(outPath, format='mp4')
def generate_video_for_id(id): et_dataset_location = f'{reflacx_dataset_location}/main_data/' mimic_dataset_location = mimiccxr_images_location table_et_pt1 = pd.read_csv(f'{et_dataset_location}/{id}/fixations.csv') table_text = pd.read_csv( f'{et_dataset_location}/{id}/timestamps_transcription.csv') main_table = pd.read_csv( f'{et_dataset_location}/metadata_phase_{id[1]}.csv') image_filepath = main_table[main_table['id'] == id]['image'].values assert (len(image_filepath) == 1) max_time_fixation = max(table_et_pt1['timestamp_end_fixation'].values) max_time_text = max(table_text['timestamp_end_word'].values) dicom_array, _, _ = open_dicom( f'{mimic_dataset_location}/{image_filepath[0]}') dicom_array = dicom_array * 255 from skimage.transform import resize dicom_array = resize(dicom_array, (int(dicom_array.shape[0] * scale_video), int(dicom_array.shape[1] * scale_video)), anti_aliasing=True) dicom_array = dicom_array.astype(np.uint8) #generat ea clip with the original dicom as every frame my_clip = ImageClip(np.stack( (dicom_array, ) * 3, axis=-1)).set_duration(max([max_time_fixation, max_time_text])).set_fps(fps) #modify every frame of the video according to the fixations and transcription tables my_clip = fl( my_clip, lambda get_frame, t: scroll(get_frame, t, table_et_pt1, table_text)) #generate the audio from the timestamped transcription full_audio = AudioSegment.empty() previous_end = 0 for _, row in table_text.iterrows(): # if start and end of the word are at the same time, it was not captured by the original transcription, so we do not use it in the audio, only in subtitle if row['timestamp_start_word'] == row['timestamp_end_word']: continue print(row['word']) # text to speech tts = SaveTTSFile('./create_video_temp.wav') tts.start( row['word'].replace('.', 'period').replace(',', 'comma').replace( '/', 'slash'), row['timestamp_start_word'], row['timestamp_end_word']) for i in range(10): if not os.path.exists('./create_video_temp.wav'): time.sleep(1) else: break if i > 10: assert (False) del (tts) # add silence between words if they did not end/start at the same time if row['timestamp_start_word'] > previous_end: full_audio += AudioSegment.silent( duration=(row['timestamp_start_word'] - previous_end) * 1000) print(full_audio.duration_seconds) print(row['timestamp_start_word']) assert (abs(full_audio.duration_seconds - row['timestamp_start_word']) < 0.005) #change the duration of the word sound to the duration it took for the radiologist to say it word_audio = AudioSegment.from_file('./create_video_temp.wav', format="wav") word_audio = stretch_audio( word_audio, './create_video_temp.wav', word_audio.duration_seconds / (row['timestamp_end_word'] - row['timestamp_start_word'])) os.remove('./create_video_temp.wav') full_audio += word_audio assert (abs(full_audio.duration_seconds - row['timestamp_end_word']) < 0.005) previous_end = row['timestamp_end_word'] full_audio.export("./create_video_temp.wav", format="wav") audio_background = mpe.AudioFileClip('./create_video_temp.wav') my_clip = my_clip.set_audio(audio_background) my_clip.write_videofile(f"movie_{id}.mp4", audio_codec='aac', codec="libx264", temp_audiofile='temp-audio.m4a', remove_temp=True, fps=30, bitrate="5000k") os.remove('./create_video_temp.wav')
def mp3_to_wav(audio_file_name): if audio_file_name.split('.')[1] == 'mp3': sound = AudioSegment.from_mp3(audio_file_name) audio_file_name = audio_file_name.split('.')[0] + '.wav' sound.export(audio_file_name, format="wav")