def make_espeak(text, lang, max_length): voice = Voice(lang=gtts_to_espeak[lang], speed=130, volume=2) if lang in gtts_to_espeak else Voice(lang="en", speed=130) wav = voice.to_audio(text) pydub_wav = AudioSegment.from_file_using_temporary_files(BytesIO(wav)) audio_length = len(pydub_wav) / 1000 return wav, audio_length
class TestEffects(unittest.TestCase): text = "Les écoute pas ces sheitane c moi le vrai crysw" def setUp(self): self.effects = { eff_class: set() for eff_class in (PhonemicEffect, TextEffect, AudioEffect, VoiceEffect) } for effect_cls in set(AVAILABLE_EFFECTS): effect = effect_cls() for cls in self.effects.keys(): if isinstance(effect, cls): self.effects[cls].add(effect) self.voice = Voice(lang="fr") def test_text_effects(self): for effect in self.effects[TextEffect]: self.assertIsNotNone(effect.process(self.text)) def test_phonemic_effects(self): pho = self.voice.to_phonemes(self.text) for effect in self.effects[PhonemicEffect]: self.assertIsNotNone(effect.process(pho)) def test_audio_effects(self): wav = self.voice.to_audio(self.text) _, wav_array = get_event_loop().run_until_complete( AudioRenderer.to_f32_16k(wav)) for effect in self.effects[AudioEffect]: self.assertIsNotNone(effect.process(wav_array)) def test_voice_effects(self): cookie_hash = md5(("parce que nous we").encode('utf8')).digest() voice_params = VoiceParameters.from_cookie_hash(cookie_hash) for effect in self.effects[VoiceEffect]: self.assertIsNotNone(effect.process(voice_params))
beat_duration = len(beats_track) / (rate * BEATS_TRACK_BEAT_COUNT) beats_track_looped = np.tile( beats_track, LOOPS_COUNT * BEATS_PER_MEASURE * len(CHORDS_PROGRESSION) // BEATS_TRACK_BEAT_COUNT) logging.info("Beat time : %dms" % (beat_duration * 1000)) logging.info("Measure time : %dms" % (beat_duration * 1000 * 4)) prog_freqs = get_trinote_progression_freqs(CHORDS_PROGRESSION) logging.info("First freq progression: \n %s" % (str(prog_freqs))) track_freqs = prog_freqs * LOOPS_COUNT progression_phonems = PhonemeList([]) for freq in track_freqs: progression_phonems.append( Phoneme("a", int(beat_duration * 1000), [(0, freq), (100, freq)])) logging.info("Rendering audio") voice = Voice(lang="fr", voice_id=2) wav = voice.to_audio(progression_phonems) if BEATS_TRACK_FILE is not None: rate, wave_array = to_f32_16k(wav) mixed_tracks = mix_tracks(beats_track_looped * 0.6, wave_array * 0.3, align="left") wav = to_wav_bytes(mixed_tracks, 16000) player = AudioPlayer() player.set_file(BytesIO(wav)) player.play() player.close()
beats_track_looped = np.tile(beats_track, PROG_LOOP_COUNT * BEATS_PER_MEASURE * FULL_LOOP_COUNT * \ (FIRST_PROG_REPEAT + SECOND_PROG_REPEAT) // BEATS_TRACK_BEAT_COUNT) logging.info("Beat time : %dms" % (beat_duration * 1000)) logging.info("Measure time : %dms" % (beat_duration * 1000 * 4)) first_prog_freqs = get_trinote_progression_freqs(FIRST_CHORDS_PROGRESSION) logging.info("First freq progression: \n %s" % (str(first_prog_freqs))) second_prog_freqs = get_trinote_progression_freqs(SECOND_CHORDS_PROGRESSION) logging.info("First freq progression: \n %s" % (str(second_prog_freqs))) track_freqs = first_prog_freqs * FIRST_PROG_REPEAT + second_prog_freqs * SECOND_PROG_REPEAT progression_phonems = PhonemeList([]) for freq in track_freqs * FULL_LOOP_COUNT: progression_phonems.append( Phoneme("w", int(beat_duration * 1000), [(0,freq), (100,freq)]) ) logging.info("Rendering audio") voice = Voice(lang="fr", voice_id=3) wav = voice.to_audio(progression_phonems) if BEATS_TRACK_FILE is not None: rate, wave_array = to_f32_16k(wav) mixed_tracks = mix_tracks(beats_track_looped * 0.6, wave_array * 0.4, align="left") wav = to_wav_bytes(mixed_tracks, 16000) player = AudioPlayer() player.set_file(BytesIO(wav)) player.play() player.close()