def genStereo(notes, audio_file, hop_length=128, sr=48000, crossfade=25, silencefade=15): C = hop_length * 1000.0 / sr note_start = notes[0] start_time = note_start[0] * C sound = Sine(0).to_audio_segment(duration=start_time) silenceCount = 0 for i in range(len(notes) - 1): noteCurr = notes[i] notePost = notes[i + 1] durationBetween = (notePost[0] - noteCurr[1]) if durationBetween == 0: durationCurr = (noteCurr[1] - noteCurr[0]) * C pitch = noteCurr[2] freq = pitch2freq(pitch) tone = Sine(freq, sample_rate=sr).to_audio_segment( duration=durationCurr + crossfade) sound = sound.append(tone, crossfade=crossfade) else: silenceCount = silenceCount + 1 durationCurr = (noteCurr[1] - noteCurr[0]) * C pitch = noteCurr[2] freq = pitch2freq(pitch) tone = Sine(freq, sample_rate=sr).to_audio_segment( duration=durationCurr + crossfade) sound = sound.append(tone, crossfade=crossfade) silenceStart = noteCurr[1] silenceEnd = notePost[0] silenceDuration = (silenceEnd - silenceStart) * C tone = Sine(0, sample_rate=sr).to_audio_segment( duration=silenceDuration + silencefade) sound = sound.append(tone, crossfade=silencefade) # add the last note noteLast = notes[-1] durationLast = (noteLast[1] - noteLast[0]) * C freq = pitch2freq(noteLast[2]) tone = Sine(freq).to_audio_segment(duration=durationLast + crossfade) sound = sound.append(tone, crossfade=crossfade) # print(sound.duration_seconds) sound2 = AudioSegment.from_wav(audio_file) # print(len(sound2)) silence = AudioSegment.silent(duration=len(sound2) + 100) left = silence.overlay(sound, gain_during_overlay=-8) right = silence.overlay(sound2, gain_during_overlay=-8) stereo_sound = AudioSegment.from_mono_audiosegments(left, right) filename = './wav2wavmix/' + audio_file[6:-4] + '_mix_conti_v4.wav' stereo_sound.export(filename, format="wav", bitrate="48k") print('stereo sound file generated!')
def SpeakLongText(long_text, max_text_length=GOOGLE_MAX_TEXT_LENGTH): "Converts a full length long_text text into an mp3" # Split the long_text into short_texts small enough to TTS long_text_as_short_texts = SplitTextToShortTexts(long_text, max_text_length) # Allocate a temporary directory with tempfile.TemporaryDirectory() as temp_dir: # Get the event loop loop = asyncio.get_event_loop() concurrency_limit = asyncio.Semaphore( MAX_CONCURRENT_GOOGLE_API_REQUESTS) # NOTE: Google's text to speech library creates a TCP connection for each request but does not close it. # These even stay open in the background after the Client is de-referenced (?!). # These each use a File Descriptor, so for a large book, we hit the max file descriptors limit and crash. # Running each TTS in its own proccess guarantees that at least at the end of the chapter, all will be de-allocated. # Manually create an executor so we can force it to clean up after with concurrent.futures.ProcessPoolExecutor( max_workers=MAX_CONCURRENT_GOOGLE_API_REQUESTS) as executor: # Call to spawn a thread to generate each short text async def GenerateShortTextInThread(loop, short_text, temp_dir): async with concurrency_limit: return await loop.run_in_executor(executor, SpeakShortText, short_text, temp_dir) # Call to generate MP3s for all the short texts (concurrently) async def SimultaneouslyGenerateSeveralShortTexts( loop, all_short_texts, temp_dir): mp3_generation_tasks = [ GenerateShortTextInThread(loop, short_text, temp_dir) for short_text in all_short_texts ] return await asyncio.gather(*mp3_generation_tasks) # Generate an MP3 for each short_text mp3s_of_short_texts = loop.run_until_complete( SimultaneouslyGenerateSeveralShortTexts( loop, long_text_as_short_texts, temp_dir)) # Attempt to clean up all resources executor.shutdown(wait=True) # Combine the short_texts into a single mp3 mp3_long_text = Sine(300).to_audio_segment(duration=500) for mp3_short_text in mp3s_of_short_texts: mp3_long_text = mp3_long_text.append( AudioSegment.from_mp3(mp3_short_text)) # Return the full Mp3 (as a temporary file) temporary_mp3 = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) mp3_long_text.export(temporary_mp3.name, format="mp3") return temporary_mp3
def text_to_audio(text, file_name, export_file_format, # e.g. "ogg" codec=None, # e.g. "opus" frequency=700, wpm=10, cross_fade=2): unit_length_seconds = wpm_to_unit_length_seconds(wpm) intervals = sentence_to_intervals(text) segment = Sine(0).to_audio_segment(cross_fade) # silence at the beginning for cross-fade for interval in intervals: segment = segment.append(interval_to_wave_data_segment(interval, frequency, unit_length_seconds), crossfade=cross_fade) segment.export(file_name, format=export_file_format, codec=codec)
class JukeBox(): def __init__(self, tempo=300, length=40, scale='cpent', profile='random', playmusic=False): self.scale(scale=scale) self.tempo = tempo self.length = length print("Loading Melodies") self.melodygen(tempo=self.tempo, length=self.length, profile=profile) if playmusic: self.playmusic() def melodygen(self, tempo, length, profile='random', p_unity=40, repetition_rate=13, crossfd=30): self.sound = self.notegen(0, tempo) rhythmbox = [ 'single', 'up_triplet', 'down_triplet', 'turn', 'reverse_turn', 'up_accia', 'down_accia', 'up_scale', 'down_scale', 'up_thirds', 'down_thirds', 'lower_thrill_slow', 'upper_thrill_slow' ] if profile == 'random': for x in range(length): # random rhythm class selector r = random2.randint(0, p_unity) # random origin note selector n = random2.randint(0, int(len(self.scale)) - 1) if r >= len(rhythmbox): self.rhythmgen(rclass='single', note_index=n, tempo=tempo, crossfd=crossfd) else: self.rhythmgen(rclass=rhythmbox[r], note_index=n, tempo=tempo, crossfd=crossfd) # Provides stacking repitition for *some* structure if x % repetition_rate == 0 and x > 0: self.sound = self.sound.append(self.sound) elif profile == 'alarm': for k in range(length): self.rhythmgen(rclass='up_triplet', note_index=4, tempo=tempo, crossfd=crossfd) self.rhythmgen(rclass='up_triplet', note_index=4, tempo=tempo, crossfd=crossfd) self.rhythmgen(rclass='pause', note_index=4, tempo=tempo, crossfd=crossfd) self.rhythmgen(rclass='down_triplet', note_index=4, tempo=tempo, crossfd=crossfd) self.rhythmgen(rclass='down_triplet', note_index=4, tempo=tempo, crossfd=crossfd) self.rhythmgen(rclass='pause', note_index=4, tempo=tempo, crossfd=crossfd) self.rhythmgen(rclass='down_thirds', note_index=4, tempo=tempo, crossfd=crossfd) self.rhythmgen(rclass='pause', note_index=4, tempo=tempo, crossfd=crossfd) return self.sound def playmusic(self, quiet=False): print('Now playing: Debussys greatest hits') if not quiet: with noALSAerror(): play(self.sound) def notegen(self, note, duration, timbre='marimba'): #sound = Sine(freq = note-5).to_audio_segment(duration = duration/5).overlay(Sine(freq = (note-5)*4).to_audio_segment(duration = duration/5, volume = -35), crossfade = crossfd).overlay(Sine(freq = (note-5)*10).to_audio_segment(duration = duration/5, volume = -40), crossfade = crossfd) if timbre == 'marimba': self.sound = (Sine(freq=note).to_audio_segment( duration=duration)).overlay( Sine(freq=note * 4).to_audio_segment( duration=duration, volume=-35)).overlay( Sine(freq=note * 10).to_audio_segment( duration=duration, volume=-40)) elif timbre == 'pure': self.sound = Sine(freq=note).to_audio_segment(duration=duration) return self.sound def scale(self, scale): # equal tempered standard piano tuning note frequencies c3, c3sharp, d3, d3sharp, e3, f3, f3sharp, g3, g3sharp, a3, a3sharp, b3, c4, c4sharp, d4, d4sharp, e4, f4, f4sharp, g4, g4sharp, a4, a4sharp, b4, c5, c5sharp, d5, d5sharp, e5, f5, f5sharp, g5, g5sharp, a5, a5sharp, b5, c6, c6sharp, d6, d6sharp, e6 = 130.81, 138.59, 146.83, 155.56, 164.81, 174.61, 185.00, 196.00, 207.65, 220.00, 233.08, 246.94, 261.63, 277.18, 293.66, 311.13, 329.63, 349.23, 369.99, 392.00, 415.30, 440.00, 466.16, 493.88, 523.25, 554.37, 587.33, 622.25, 659.25, 698.46, 739.99, 783.99, 830.61, 880.00, 932.33, 987.77, 1046.50, 1108.73, 1174.66, 1244.51, 1318.51 if scale == 'cpent': # 15 notes self.scale = [ c3, d3, e3, g3, a3, c4, d4, e4, g4, a4, c5, d5, e5, g5, a5 ] elif scale == 'cblues': # 18 notes self.scale = [ c3, d3sharp, f3, f3sharp, g3, a3sharp, c4, d4sharp, f4, f4sharp, g4, a4sharp, c5, d5sharp, f5, f5sharp, g5, a5sharp ] def rhythmgen(self, rclass, note_index, tempo, crossfd=30): if rclass == 'single': self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo), crossfade=crossfd) elif rclass == 'up_triplet': if note_index >= int(len(self.scale)) - 2: self.sound = self.sound.append(self.notegen( self.scale[note_index - 2], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 3), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 2], tempo / 3), crossfade=crossfd) elif rclass == 'down_triplet': if note_index <= 1: self.sound = self.sound.append(self.notegen( self.scale[note_index + 2], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 3), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 2], tempo / 3), crossfade=crossfd) elif rclass == 'turn': if note_index == 0: self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 2], tempo / 3), crossfade=crossfd) elif note_index == int(len(self.scale)) - 1: self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) elif rclass == 'reverse_turn': if note_index == 0: self.sound = self.sound.append(self.notegen( self.scale[note_index + 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) elif note_index == int(len(self.scale)) - 1: self.sound = self.sound.append(self.notegen( self.scale[note_index - 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) elif rclass == 'up_accia': if note_index == int(len(self.scale)) - 1: self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], (tempo * 2) / 3), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], (tempo * 2) / 3), crossfade=crossfd) elif rclass == 'down_accia': if note_index == 0: self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], (tempo * 2) / 3), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 3), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], (tempo * 2) / 3), crossfade=crossfd) elif rclass == 'up_scale': for s in range(0, int(len(self.scale))): self.sound = self.sound.append(self.notegen( self.scale[s], tempo / 2), crossfade=crossfd) elif rclass == 'down_scale': for s in range(int(len(self.scale)), -1): self.sound = self.sound.append(self.notegen( self.scale[s], tempo / 2), crossfade=crossfd) # 10th class: ascending in broken thirds elif rclass == 'up_thirds': if note_index >= int(len(self.scale)) - 5: self.sound = self.sound.append(self.notegen( self.scale[note_index - 5], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 3], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 4], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 2], tempo / 2), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 3], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 4], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 3], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 5], tempo / 2), crossfade=crossfd) elif rclass == 'down_thirds': if note_index < 5: self.sound = self.sound.append(self.notegen( self.scale[note_index + 5], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 3], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 4], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 3], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 3], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 2], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 4], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 3], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 5], tempo / 2), crossfade=crossfd) elif rclass == 'upper_thrill_slow': if note_index == int(len(self.scale)) - 1: self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) elif rclass == 'lower_thrill_slow': if note_index == 0: self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index + 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) else: self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index], tempo / 2), crossfade=crossfd) self.sound = self.sound.append(self.notegen( self.scale[note_index - 1], tempo / 2), crossfade=crossfd) elif rclass == 'pause': self.sound = self.sound.append(self.notegen(0, tempo * 4), crossfade=crossfd) return self.sound def __enter__(self): return self def __exit__(self, e_type, e_val, traceback): pass